moved scrapers into app_dir events
This commit is contained in:
90
events/scrapers/Templates/TemplateScraper.py
Normal file
90
events/scrapers/Templates/TemplateScraper.py
Normal file
@@ -0,0 +1,90 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
count = 0
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Venue Name",
|
||||
city="Minneapolis",
|
||||
website="Event Website",
|
||||
)
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name=venue.name,
|
||||
website=venue.website,
|
||||
items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
print("Scraper: ", scraper)
|
||||
|
||||
event_type = ""
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/li[@class="event"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
day = c.xpath('.//*/span[@class="day"]/text()')[0]
|
||||
month = c.xpath('.//*/span[@class="mth"]/text()')[0]
|
||||
year = datetime.now().year
|
||||
if month == "Jan":
|
||||
year = int(year) + 1
|
||||
event['calendar'] = venue.calendar
|
||||
event['title'] = c.xpath('.//*/span[@class="event_title"]/a/text()')[0]
|
||||
event['date'] = [month, day, str(year), c.xpath('.//*/span[@class="event_time"]/text()')[0].strip()]
|
||||
event['date'] = " ".join(event['date'])
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = c.xpath('.//*/span[@class="event_title"]/a/@href')[0]
|
||||
print("Event Dict Created")
|
||||
createBasicEvent(event, event_type, venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = getSource(br, venue.website)
|
||||
get_events(ps, "Ed")
|
||||
sleep(3)
|
||||
|
||||
|
||||
scraper.save()
|
||||
|
||||
# Get Event Page Link(s)
|
||||
# links = createURL("https://acmecomedycompany.com/the-club/calendar/")
|
||||
|
||||
# for link in links:
|
||||
# ps = getSource(br, link)
|
||||
# get_events(ps, "Ed")
|
||||
# sleep(3)
|
||||
|
||||
br.close()
|
||||
67
events/scrapers/Templates/TemplateScraper.py.bak
Normal file
67
events/scrapers/Templates/TemplateScraper.py.bak
Normal file
@@ -0,0 +1,67 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization
|
||||
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Venue Name",
|
||||
city="Minneapolis",
|
||||
website="Event Website",
|
||||
)
|
||||
|
||||
event_type = ""
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/li[@class="event"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
day = c.xpath('.//*/span[@class="day"]/text()')[0]
|
||||
month = c.xpath('.//*/span[@class="mth"]/text()')[0]
|
||||
year = datetime.now().year
|
||||
if month == "Jan":
|
||||
year = int(year) + 1
|
||||
event['title'] = c.xpath('.//*/span[@class="event_title"]/a/text()')[0]
|
||||
event['date'] = [month, day, str(year), c.xpath('.//*/span[@class="event_time"]/text()')[0].strip()]
|
||||
event['date'] = " ".join(event['date'])
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = c.xpath('.//*/span[@class="event_title"]/a/@href')[0]
|
||||
print("Event Dict Created")
|
||||
createBasicEvent(event, event_type, venue)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
# Get Event Page Link(s)
|
||||
links = createURL("https://acmecomedycompany.com/the-club/calendar/")
|
||||
|
||||
for link in links:
|
||||
ps = getSource(br, link)
|
||||
get_events(ps, "Ed")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
84
events/scrapers/Templates/ical_run.py.template
Normal file
84
events/scrapers/Templates/ical_run.py.template
Normal file
@@ -0,0 +1,84 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="location",
|
||||
city="Minneapolis",
|
||||
website="",
|
||||
)
|
||||
|
||||
event_type = ""
|
||||
|
||||
calendar_url = [
|
||||
'https://calendar.google.com/calendar/ical/js94epu90r2et31aopons1ifm8%40group.calendar.google.com/public/basic.ics',
|
||||
'https://calendar.google.com/calendar/ical/6rpooudjg01vc8bjek1snu2ro0%40group.calendar.google.com/public/basic.ics',
|
||||
'https://calendar.google.com/calendar/ical/teflgutelllvla7r6vfcmjdjjo%40group.calendar.google.com/public/basic.ics'
|
||||
]
|
||||
|
||||
objIcalData = requests.get(calendar_url[1])
|
||||
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
|
||||
cfpa_events = []
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
for component in gcal.walk():
|
||||
event = {}
|
||||
event['strSummary'] = f"{(component.get('SUMMARY'))}"
|
||||
event['strDesc'] = component.get('DESCRIPTION')
|
||||
event['strLocation'] = component.get('LOCATION')
|
||||
event['dateStart'] = component.get('DTSTART')
|
||||
event['dateStamp'] = component.get('DTSTAMP')
|
||||
if event['dateStamp'] is not None:
|
||||
event['dateStamp'] = event['dateStamp'].dt
|
||||
if event['dateStart'] is not None:
|
||||
try:
|
||||
event['dateStart'] = event['dateStart'].dt.astimezone(pytz.utc)
|
||||
except Exception as e:
|
||||
event['dateStart'] = event['dateStart'].dt
|
||||
|
||||
event['dateEnd'] = (component.get('DTEND'))
|
||||
if event['dateEnd'] is not None:
|
||||
event['dateEnd'] = event['dateEnd'].dt
|
||||
else:
|
||||
event['dateEnd'] = event['dateStart']
|
||||
if event['strSummary'] != 'None':
|
||||
event['details'] = {
|
||||
"description" : event['strDesc'],
|
||||
"DateTime" : event['dateStart'],
|
||||
"Location" : event['strLocation'],
|
||||
}
|
||||
cfpa_events.append(event)
|
||||
new_event = DSEvent.objects.update_or_create(
|
||||
event_type = event_type,
|
||||
show_title = event['strSummary'],
|
||||
show_link = event['link'],
|
||||
show_date = event['dateStart'],
|
||||
show_day = event['dateStart'].date(),
|
||||
more_details = event["details"],
|
||||
venue = venue
|
||||
)
|
||||
|
||||
|
||||
|
||||
# {'dateEnd': datetime.datetime(2022, 10, 22, 18, 30, tzinfo=<UTC>),
|
||||
# 'dateStamp': datetime.datetime(2023, 3, 23, 1, 57, 45, tzinfo=<UTC>),
|
||||
# 'dateStart': datetime.datetime(2022, 10, 22, 17, 30, tzinfo=<UTC>),
|
||||
# 'details': {'DateTime': datetime.datetime(2022, 10, 22, 17, 30, tzinfo=<UTC>),
|
||||
# 'Location': vText('b'''),
|
||||
# 'description': None},
|
||||
# 'strDesc': None,
|
||||
# 'strLocation': vText('b'''),
|
||||
# 'strSummary': 'Nia Class with Beth Giles'}
|
||||
|
||||
|
||||
84
events/scrapers/Templates/ical_template.py
Normal file
84
events/scrapers/Templates/ical_template.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="location",
|
||||
city="Minneapolis",
|
||||
website="",
|
||||
)
|
||||
|
||||
event_type = ""
|
||||
|
||||
calendar_url = [
|
||||
'https://calendar.google.com/calendar/ical/js94epu90r2et31aopons1ifm8%40group.calendar.google.com/public/basic.ics',
|
||||
'https://calendar.google.com/calendar/ical/6rpooudjg01vc8bjek1snu2ro0%40group.calendar.google.com/public/basic.ics',
|
||||
'https://calendar.google.com/calendar/ical/teflgutelllvla7r6vfcmjdjjo%40group.calendar.google.com/public/basic.ics'
|
||||
]
|
||||
|
||||
objIcalData = requests.get(calendar_url[1])
|
||||
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
|
||||
cfpa_events = []
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
for component in gcal.walk():
|
||||
event = {}
|
||||
event['strSummary'] = f"{(component.get('SUMMARY'))}"
|
||||
event['strDesc'] = component.get('DESCRIPTION')
|
||||
event['strLocation'] = component.get('LOCATION')
|
||||
event['dateStart'] = component.get('DTSTART')
|
||||
event['dateStamp'] = component.get('DTSTAMP')
|
||||
if event['dateStamp'] is not None:
|
||||
event['dateStamp'] = event['dateStamp'].dt
|
||||
if event['dateStart'] is not None:
|
||||
try:
|
||||
event['dateStart'] = event['dateStart'].dt.astimezone(pytz.utc)
|
||||
except Exception as e:
|
||||
event['dateStart'] = event['dateStart'].dt
|
||||
|
||||
event['dateEnd'] = (component.get('DTEND'))
|
||||
if event['dateEnd'] is not None:
|
||||
event['dateEnd'] = event['dateEnd'].dt
|
||||
else:
|
||||
event['dateEnd'] = event['dateStart']
|
||||
if event['strSummary'] != 'None':
|
||||
event['details'] = {
|
||||
"description" : event['strDesc'],
|
||||
"DateTime" : event['dateStart'],
|
||||
"Location" : event['strLocation'],
|
||||
}
|
||||
cfpa_events.append(event)
|
||||
new_event = DSEvent.objects.update_or_create(
|
||||
event_type = event_type,
|
||||
show_title = event['strSummary'],
|
||||
show_link = event['link'],
|
||||
show_date = event['dateStart'],
|
||||
show_day = event['dateStart'].date(),
|
||||
more_details = event["details"],
|
||||
venue = venue
|
||||
)
|
||||
|
||||
|
||||
|
||||
# {'dateEnd': datetime.datetime(2022, 10, 22, 18, 30, tzinfo=<UTC>),
|
||||
# 'dateStamp': datetime.datetime(2023, 3, 23, 1, 57, 45, tzinfo=<UTC>),
|
||||
# 'dateStart': datetime.datetime(2022, 10, 22, 17, 30, tzinfo=<UTC>),
|
||||
# 'details': {'DateTime': datetime.datetime(2022, 10, 22, 17, 30, tzinfo=<UTC>),
|
||||
# 'Location': vText('b'''),
|
||||
# 'description': None},
|
||||
# 'strDesc': None,
|
||||
# 'strLocation': vText('b'''),
|
||||
# 'strSummary': 'Nia Class with Beth Giles'}
|
||||
|
||||
|
||||
102
events/scrapers/Working/cals/MplStpMag.mn.py
Normal file
102
events/scrapers/Working/cals/MplStpMag.mn.py
Normal file
@@ -0,0 +1,102 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization
|
||||
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Mpls Stp Mag",
|
||||
city="Minneapolis",
|
||||
website="https://calendar.mspmag.com/calendars/all-events/",
|
||||
)
|
||||
|
||||
event_type = ""
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
td = relativedelta.relativedelta(days=1)
|
||||
fortnight = relativedelta.relativedelta(days=14)
|
||||
odt = datetime.now() + fortnight
|
||||
|
||||
# DATETIME_FORMAT = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT = '%A, %B %d %Y %I:%M%p'
|
||||
DATETIME_FORMAT_ALT = '%A, %B %d %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="css-card js-card day-card type-smad expandable"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
event['calendar'] = venue.calendar
|
||||
event_block = c.xpath('.//*/li[@class="card-listings-item event-element"]')
|
||||
date = c.xpath('.//div[@class="day-card__header day-card__header--daily"]/text()')[0].replace("\n", "").strip()
|
||||
if date == "Today":
|
||||
date = datetime.today()
|
||||
elif date == "Tomorrow":
|
||||
date = datetime.today() + td
|
||||
# month = c.xpath('.//*/span[@class="mth"]/text()')[0]
|
||||
year = datetime.now().year
|
||||
# if month == "Jan":
|
||||
# year = int(year) + 1
|
||||
dateTime = datetime.strptime(date + " " + str(year), DATETIME_FORMAT_ALT)
|
||||
if dateTime > odt:
|
||||
print("DATE TIME ", dateTime)
|
||||
break
|
||||
for ev in event_block:
|
||||
time = ev.xpath('.//*/span[@class="card-listing-item-time"]/text()')[0].replace("@", "").strip()
|
||||
if time == "All day":
|
||||
time = "12:00pm"
|
||||
event['title'] = ev.xpath('.//*/div[@class="card-listing-item-title"]/text()')[0] + " (Check link for times.)"
|
||||
elif "-" in time:
|
||||
time = time.split("-")[0]
|
||||
event['title'] = ev.xpath('.//*/div[@class="card-listing-item-title"]/text()')[0]
|
||||
else:
|
||||
event['title'] = ev.xpath('.//*/div[@class="card-listing-item-title"]/text()')[0]
|
||||
|
||||
event['location'] = ev.xpath('.//*/span[@class="card-listing-item-location"]/text()')[0]
|
||||
if event['location'] == '7th St. Entry':
|
||||
event['location'] = '7th St Entry'
|
||||
elif event['location'] == '7th Street Entry':
|
||||
event['location'] = '7th St Entry'
|
||||
elif event['location'] == 'Amsterdam Bar and Hall':
|
||||
event['location'] = 'Amsterdam Bar & Hall'
|
||||
new_venue, created = Organization.objects.get_or_create(name=event['location'])
|
||||
print("V: ", new_venue, created)
|
||||
|
||||
event['dateTime'] = date + " " + str(year) + " " + time
|
||||
event['link'] = venue.website + c.xpath('.//@data-event')[0]
|
||||
event['dateStamp'] = datetime.strptime(event['dateTime'], DATETIME_FORMAT)
|
||||
|
||||
|
||||
|
||||
createBasicEvent(event, event_type, new_venue)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
# Get Event Page Link(s)
|
||||
# links = createURL("https://acmecomedycompany.com/the-club/calendar/")
|
||||
|
||||
ps = getSource(br, venue.website)
|
||||
get_events(ps, "Ed")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
105
events/scrapers/Working/cals/minnestar.py
Normal file
105
events/scrapers/Working/cals/minnestar.py
Normal file
@@ -0,0 +1,105 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
count = 0
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Minnestar",
|
||||
city="Minneapolis",
|
||||
website="https://minnestar.org/community/calendar",
|
||||
)
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name=venue.name,
|
||||
website=venue.website,
|
||||
items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
print("Scraper: ", scraper)
|
||||
|
||||
event_type = ""
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%B %d, %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%B %d %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
links = ps.xpath('.//*/div[@id="community-calendar-list-view-container"]/a/@href')
|
||||
ppr(links)
|
||||
for l in links:
|
||||
pse = getSource(br, l)
|
||||
sleep(1)
|
||||
event = {}
|
||||
event['calendar'] = venue.calendar
|
||||
event['link'] = l
|
||||
try:
|
||||
starttime = pse.xpath('.//*/time/text()')[0]
|
||||
endtime = pse.xpath('.//*/time/@datetime')[1]
|
||||
event['dateStamp'] = datetime.strptime(starttime, DATETIME_FORMAT)
|
||||
event['title'] = pse.xpath('.//*/h1[@class="heading-2"]/text()')[0]
|
||||
# event['detail-headers'] = pse.xpath('.//*/ul[@class="eo-event-meta"]/li/strong/text()')
|
||||
# event['details'] = pse.xpath('.//*/ul[@class="eo-event-meta"]/li/text()')
|
||||
|
||||
except:
|
||||
try:
|
||||
event['title'] = pse.xpath('.//*/h1[@class="heading-2"]/text()')[0]
|
||||
starttime = pse.xpath('.//*/time/text()')[0]
|
||||
event['dateStamp'] = datetime.strptime(starttime, DATETIME_FORMAT)
|
||||
except Exception as e:
|
||||
try:
|
||||
print(e)
|
||||
print('failed event: ', event)
|
||||
starttime = pse.xpath('.//*/time/text()')[0]
|
||||
event['dateStamp'] = datetime.strptime(starttime + ' 2025', DATETIME_FORMAT_2)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("failed event: ", event)
|
||||
ppr(event)
|
||||
try:
|
||||
createBasicEvent(event, "Ot", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('failed to create: ', event)
|
||||
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = getSource(br, venue.website)
|
||||
get_events(ps, "Ot")
|
||||
sleep(3)
|
||||
|
||||
scraper.save()
|
||||
|
||||
# Get Event Page Link(s)
|
||||
# links = createURL("https://acmecomedycompany.com/the-club/calendar/")
|
||||
|
||||
# for link in links:
|
||||
# ps = getSource(br, link)
|
||||
# get_events(ps, "Ed")
|
||||
# sleep(3)
|
||||
|
||||
br.close()
|
||||
147
events/scrapers/Working/govt/MNLeg.py
Normal file
147
events/scrapers/Working/govt/MNLeg.py
Normal file
@@ -0,0 +1,147 @@
|
||||
# Install Chromedriver and Quarantine
|
||||
# xattr -d com.apple.quarantine <name-of-executable>
|
||||
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
from selenium import webdriver as wd
|
||||
|
||||
from xvfbwrapper import Xvfb
|
||||
from lxml import html
|
||||
import pytz
|
||||
|
||||
from events.models import Event, Organization, Scraper
|
||||
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="MN Legislature",
|
||||
website="https://www.leg.mn.gov/cal?type=all",
|
||||
items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name="MN Legislature")
|
||||
print("Scraper: ", scraper)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
|
||||
|
||||
# Set initial variables for City, etc
|
||||
calendar_url = 'https://www.leg.mn.gov/cal?type=all'
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(60)
|
||||
ps = html.fromstring(br.page_source)
|
||||
|
||||
commEvents = ps.xpath('.//*/div[@class="card border-dark comm_item cal_item ml-lg-3"]')
|
||||
senateEvents = ps.xpath('.//*/div[@class="card border-dark senate_item cal_item ml-lg-3"]')
|
||||
houseEvents = ps.xpath('.//*/div[@class="card border-dark house_item cal_item ml-lg-3"]')
|
||||
meetings = []
|
||||
|
||||
for hE in houseEvents:
|
||||
details = {}
|
||||
dateTime = hE.xpath('.//*/b/text()')[0]
|
||||
try:
|
||||
title = hE.xpath('.//*/h3/a/text()')[0]
|
||||
except:
|
||||
title = hE.xpath('.//*/h3/text()')[0]
|
||||
try:
|
||||
link = "https://www.leg.mn.gov/" + hE.xpath('.//*/div[@class="float-right text-center mr-2 d-print-none"]/a/@href')[0]
|
||||
except:
|
||||
link = hE.xpath('.//*/h3/a/@href')[0]
|
||||
details['location'] = hE.xpath('.//*/div[@class=""]/text()')[0]
|
||||
# print(dateTime, title, link, details['location'])
|
||||
venue, created = Organization.objects.get_or_create(name="MN House", city="St. Paul")
|
||||
new_event, created = Event.objects.update_or_create(
|
||||
calendar = 'msp'
|
||||
event_type = 'Gv',
|
||||
show_title = title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
|
||||
more_details = details['location'],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
|
||||
|
||||
for sE in senateEvents:
|
||||
details = {}
|
||||
dateTime = sE.xpath('.//*/b/text()')[0]
|
||||
try:
|
||||
title = sE.xpath('.//*/h3/a/text()')[0]
|
||||
except:
|
||||
title = sE.xpath('.//*/h3/text()')[0]
|
||||
try:
|
||||
link = "https://www.leg.mn.gov/" + sE.xpath('.//*/div[@class="float-right text-center mr-2"]/a/@href')[0]
|
||||
except:
|
||||
link = sE.xpath('.//*/h3/a/@href')[0]
|
||||
location_list = sE.xpath('.//*/text()')
|
||||
if 'Location: ' in location_list:
|
||||
iN = location_list.index("Location: ")
|
||||
details['location'] = location_list[iN + 1]
|
||||
elif 'Senate Floor Session' in location_list:
|
||||
details['location'] = 'Senate Floor Session'
|
||||
venue, created = Organization.objects.get_or_create(name="MN Senate", city="St. Paul")
|
||||
new_event = Event.objects.update_or_create(
|
||||
event_type = 'Gv',
|
||||
show_title = title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
|
||||
more_details = details['location'],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
|
||||
for cE in commEvents:
|
||||
details = {}
|
||||
dateTime = cE.xpath('.//*/b/text()')[0]
|
||||
try:
|
||||
title = cE.xpath('.//*/h3/a/text()')[0]
|
||||
except:
|
||||
title = cE.xpath('.//*/h3/text()')[0]
|
||||
try:
|
||||
link = "https://www.leg.mn.gov/" + cE.xpath('.//*/div[@class="float-right text-center mr-2"]/a/@href')[0]
|
||||
except:
|
||||
link = cE.xpath('.//*/h3/a/@href')[0]
|
||||
location_list = cE.xpath('.//*/text()')
|
||||
if 'Room: ' in location_list:
|
||||
iN = location_list.index("Room: ")
|
||||
details['location'] = location_list[iN + 1]
|
||||
# print(dateTime, title, link, details['location'])
|
||||
venue, created = Organization.objects.get_or_create(name="MN Legislature", city="St. Paul")
|
||||
new_event = Event.objects.update_or_create(
|
||||
event_type = 'Gv',
|
||||
show_title = title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
|
||||
more_details = details['location'],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
|
||||
|
||||
br.close()
|
||||
scraper.save()
|
||||
|
||||
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()
|
||||
99
events/scrapers/Working/govt/MplsCityCouncil.py
Normal file
99
events/scrapers/Working/govt/MplsCityCouncil.py
Normal file
@@ -0,0 +1,99 @@
|
||||
import re, os, sys
|
||||
from datetime import datetime
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event, Organization, Scraper
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
from selenium import webdriver as wd
|
||||
from selenium.webdriver.support.ui import Select
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
from xvfbwrapper import Xvfb
|
||||
from lxml import html
|
||||
import pytz
|
||||
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Mpls City Council",
|
||||
website="https://lims.minneapolismn.gov/Calendar/citycouncil/upcoming",
|
||||
items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name="Mpls City Council")
|
||||
print("Scraper: ", scraper)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%A, %b %d, %Y %I:%M %p'
|
||||
|
||||
calendar_url = 'https://lims.minneapolismn.gov/Calendar/citycouncil/upcoming'
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(25)
|
||||
# br.find_element(By.XPATH, '//*/li[@class="tab-header-small"]/a').click()
|
||||
# sleep(15)
|
||||
# all_entries = Select(br.find_element(By.XPATH, '//*/select'))
|
||||
# all_entries.select_by_value('50')
|
||||
# sleep(15)
|
||||
|
||||
ps = html.fromstring(br.page_source)
|
||||
|
||||
dayBlocks = ps.xpath('.//*/div[@class="ng-scope"]')
|
||||
meetings = []
|
||||
|
||||
for dB in dayBlocks:
|
||||
date = dB.xpath('.//div[@class="row"]/div/span[@class="ng-binding"]/text()')[0]
|
||||
events = dB.xpath('.//div[@class="upcoming ng-scope"]/div')
|
||||
for event in events:
|
||||
time = event.xpath('.//div/text()')[0]
|
||||
title = event.xpath('.//div/a/text()')[0].strip()
|
||||
if not len(title) > 0:
|
||||
title = event.xpath('.//div/span/a/text()')[0].strip()
|
||||
link = event.xpath('.//div/a/@href')[0]
|
||||
if link.startswith("/Download/"):
|
||||
link = calendar_url
|
||||
else:
|
||||
link = "https://lims.minneapolismn.gov" + link
|
||||
location = title.split(',')[-1].strip()
|
||||
mtg_title = title.split(',')[:-1]
|
||||
if len(mtg_title) > 1:
|
||||
mtg_title = (' -').join(mtg_title).strip()
|
||||
else:
|
||||
mtg_title = mtg_title[0].strip()
|
||||
dateTime = datetime.strptime(date + " " + time, DATETIME_FORMAT)
|
||||
if location == "City Hall":
|
||||
location = "Mpls City Hall"
|
||||
print(dateTime, location, mtg_title, link)
|
||||
print('\n\n++++\n\n')
|
||||
venue, created = Organization.objects.get_or_create(name=location, city="Minneapolis")
|
||||
new_event = Event.objects.update_or_create(
|
||||
calendar = 'msp'
|
||||
event_type = 'Gv',
|
||||
show_title = mtg_title,
|
||||
show_link = link,
|
||||
show_date = dateTime,
|
||||
show_day = dateTime,
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
|
||||
|
||||
br.close()
|
||||
scraper.save()
|
||||
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()
|
||||
73
events/scrapers/Working/govt/StPaulCityCouncil.py
Normal file
73
events/scrapers/Working/govt/StPaulCityCouncil.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import re, os, sys
|
||||
from datetime import datetime
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event, Organization, Scraper
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
from selenium import webdriver as wd
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
from xvfbwrapper import Xvfb
|
||||
from lxml import html
|
||||
import pytz
|
||||
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="St Paul City Council",
|
||||
website="https://www.stpaul.gov/calendar",
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%B %d, %Y at %I:%M %p'
|
||||
|
||||
calendar_url = 'https://www.stpaul.gov/calendar'
|
||||
city_site = "https://www.stpaul.gov"
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(3)
|
||||
|
||||
|
||||
def getEvents(br):
|
||||
ps = html.fromstring(br.page_source)
|
||||
eventBlocks = ps.xpath('.//*/div[@class="calendar__item views-row"]')
|
||||
|
||||
for eB in eventBlocks:
|
||||
title = eB.xpath('.//div/h3[@class="field-content calendar__title"]/text()')[0]
|
||||
link = city_site + eB.xpath('.//div/span[@class="field-content calendar__link"]/a/@href')[0]
|
||||
dateTime = eB.xpath('.//div[@class="views-field views-field-field-calendar-date-value"]/span/text()')[0]
|
||||
print(dateTime, title, link)
|
||||
print('\n\n++++\n\n')
|
||||
venue, created = Organization.objects.get_or_create(name="Somewhere in St Paul", city="St. Paul")
|
||||
new_event = Event.objects.update_or_create(
|
||||
calendar = 'msp'
|
||||
event_type = 'Gv',
|
||||
show_title = title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(dateTime, DATETIME_FORMAT),
|
||||
venue = venue
|
||||
)
|
||||
|
||||
getEvents(br)
|
||||
sleep(5)
|
||||
br.get("https://www.stpaul.gov/calendar?page=1")
|
||||
getEvents(br)
|
||||
|
||||
br.close()
|
||||
|
||||
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()
|
||||
116
events/scrapers/Working/govt/mngov.py
Normal file
116
events/scrapers/Working/govt/mngov.py
Normal file
@@ -0,0 +1,116 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization, Scraper
|
||||
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
odt = datetime.now() + td
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="MN Launch",
|
||||
city="Minneapolis",
|
||||
website="https://mn.gov/launchmn/calendar",
|
||||
)
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name=venue.name,
|
||||
website=venue.website,
|
||||
items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
print("Scraper: ", scraper)
|
||||
|
||||
event_type = "Ed"
|
||||
|
||||
cal_url = "https://timelyapp.time.ly/api/calendars/54705514/export?format=ics&target=copy&start_date=2024-12-13"
|
||||
calendar_url = 'https://calendar.google.com/calendar/ical/uvkshlggh1h4ck08emab22btkum9hl94%40import.calendar.google.com/public/basic.ics'
|
||||
|
||||
objIcalData = requests.get(cal_url)
|
||||
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
|
||||
cfpa_events = []
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
for component in gcal.walk():
|
||||
event = {}
|
||||
event['strSummary'] = f"{(component.get('SUMMARY'))}"
|
||||
event['strDesc'] = component.get('DESCRIPTION')
|
||||
event['strLocation'] = component.get('LOCATION')
|
||||
event['dateStart'] = component.get('DTSTART')
|
||||
event['dateStamp'] = component.get('DTSTAMP')
|
||||
if event['dateStamp'] is not None:
|
||||
event['dateStamp'] = event['dateStart'].dt
|
||||
if event['dateStart'] is not None:
|
||||
try:
|
||||
event['dateStart'] = event['dateStart'].dt
|
||||
except Exception as e:
|
||||
event['dateStart'] = event['dateStart'].dt
|
||||
|
||||
event['dateEnd'] = (component.get('DTEND'))
|
||||
if event['dateEnd'] is not None:
|
||||
event['dateEnd'] = event['dateEnd'].dt
|
||||
else:
|
||||
event['dateEnd'] = event['dateStart']
|
||||
if event['strSummary'] != 'None':
|
||||
event['details'] = {
|
||||
"description" : event['strDesc'],
|
||||
"Location" : event['strLocation'],
|
||||
}
|
||||
cfpa_events.append(event)
|
||||
now_now = datetime.now().astimezone(tz)
|
||||
try:
|
||||
if event['dateStart'] > now_now:
|
||||
print(event['strSummary'])
|
||||
new_event, created = DSEvent.objects.update_or_create(
|
||||
calendar = 'msp'
|
||||
event_type = event_type,
|
||||
show_title = event['strSummary'],
|
||||
show_link = venue.website,
|
||||
show_date = event['dateStart']-td,
|
||||
show_day = event['dateStart']-td,
|
||||
more_details = event["details"],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
if event['strLocation'] != None and event['strLocation'] != 'MN' and event['strLocation'] != 'Online':
|
||||
loc = event['strLocation'].split('@')
|
||||
new_venue_name = loc[0]
|
||||
if len(loc) > 1:
|
||||
address = loc[1].split(",")
|
||||
city = address[1].strip()
|
||||
new_venue, created = Organization.objects.get_or_create(
|
||||
name=new_venue_name,
|
||||
city=city,
|
||||
website="https://mn.gov/launchmn/calendar",
|
||||
)
|
||||
new_event.venue = new_venue
|
||||
new_event.save()
|
||||
else:
|
||||
new_event.venue = venue
|
||||
new_event.save()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("Event: ", event['dateStart'], event['strSummary'])
|
||||
print("Clock: ", now_now)
|
||||
else:
|
||||
print("Failed: ", component.get('DESCRIPTION'))
|
||||
|
||||
scraper.save()
|
||||
|
||||
|
||||
50
events/scrapers/Working/iCal/ical.CAFAC.mpls.py
Normal file
50
events/scrapers/Working/iCal/ical.CAFAC.mpls.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization, Scraper, Calendar
|
||||
import events.digitools as digitools
|
||||
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Chicago Ave Fire Arts Center",
|
||||
city="Minneapolis",
|
||||
website="https://www.cafac.org/classes",
|
||||
)
|
||||
event_type = "Ed"
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Chicago Ave Fire Arts Center",
|
||||
website="https://calendar.google.com/calendar/ical/9qj2426rukra3jv933nslsf3r8%40group.calendar.google.com/public/basic.ics",
|
||||
calendar = Calendar.objects.get(id=1),
|
||||
items = 0,
|
||||
new_items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
|
||||
item_count_start = scraper.items
|
||||
|
||||
event_type = "Ed"
|
||||
|
||||
objIcalData = requests.get(scraper.website)
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
tz = pytz.timezone("US/Central")
|
||||
digitools.getiCalEvents(gcal, scraper)
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
44
events/scrapers/Working/iCal/ical_run.KJHideaway.StPaul.py
Normal file
44
events/scrapers/Working/iCal/ical_run.KJHideaway.StPaul.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
from datetime import datetime
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization, Scraper, Calendar
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="KJ's Hideaway",
|
||||
city="Minneapolis",
|
||||
website="",
|
||||
)
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="KJ's Hideaway",
|
||||
website="https://calendar.google.com/calendar/ical/sgmok5t13vspeoruhruh33dhj0hgc50q%40import.calendar.google.com/public/basic.ics",
|
||||
calendar = Calendar.objects.get(id=1),
|
||||
items = 0,
|
||||
new_items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
|
||||
item_count_start = scraper.items
|
||||
|
||||
event_type = "Mu"
|
||||
|
||||
objIcalData = requests.get(scraper.website)
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
tz = pytz.timezone("US/Central")
|
||||
digitools.getiCalEvents(gcal, scraper)
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
@@ -0,0 +1,48 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization, Scraper, Calendar
|
||||
import events.digitools as digitools
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Sociable Ciderwerks",
|
||||
city="Minneapolis",
|
||||
website="https://sociablecider.com/events",
|
||||
)
|
||||
event_type = "Mu"
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Sociable Ciderwerks",
|
||||
website="https://calendar.google.com/calendar/ical/c_oa7uitvkn871o1ojl5e1os4ve8%40group.calendar.google.com/public/basic.ics",
|
||||
calendar = Calendar.objects.get(id=1),
|
||||
items = 0,
|
||||
new_items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
|
||||
item_count_start = scraper.items
|
||||
|
||||
objIcalData = requests.get(scraper.website)
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
tz = pytz.timezone("US/Central")
|
||||
digitools.getiCalEvents(gcal, scraper)
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
48
events/scrapers/Working/iCal/ical_run.bunkers.py
Normal file
48
events/scrapers/Working/iCal/ical_run.bunkers.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization, Scraper, Calendar
|
||||
|
||||
import events.digitools as digitools
|
||||
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Bunkers",
|
||||
city="Minneapolis",
|
||||
website="https://bunkersmusic.com/calendar/",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Bunkers",
|
||||
website="https://calendar.google.com/calendar/ical/js94epu90r2et31aopons1ifm8%40group.calendar.google.com/public/basic.ics",
|
||||
calendar = Calendar.objects.get(id=1),
|
||||
items = 0,
|
||||
new_items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
|
||||
item_count_start = scraper.items
|
||||
|
||||
event_type = "Mu"
|
||||
|
||||
objIcalData = requests.get(scraper.website)
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
tz = pytz.timezone("US/Central")
|
||||
digitools.getiCalEvents(gcal, scraper)
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
48
events/scrapers/Working/iCal/ical_run.cfpa.py
Normal file
48
events/scrapers/Working/iCal/ical_run.cfpa.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization, Scraper, Calendar
|
||||
import events.digitools as digitools
|
||||
|
||||
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Center for Performing Arts",
|
||||
city="Minneapolis",
|
||||
website="https://www.cfpampls.com/events",
|
||||
)
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Center for Performing Arts",
|
||||
website="https://calendar.google.com/calendar/ical/6rpooudjg01vc8bjek1snu2ro0%40group.calendar.google.com/public/basic.ics",
|
||||
calendar = Calendar.objects.get(id=1),
|
||||
items = 0,
|
||||
new_items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
|
||||
item_count_start = scraper.items
|
||||
|
||||
event_type = "Ed"
|
||||
|
||||
objIcalData = requests.get(scraper.website)
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
tz = pytz.timezone("US/Central")
|
||||
digitools.getiCalEvents(gcal, scraper)
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
46
events/scrapers/Working/iCal/ical_run.eagles.py
Normal file
46
events/scrapers/Working/iCal/ical_run.eagles.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
from datetime import datetime
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization, Scraper, Calendar
|
||||
import events.digitools as digitools
|
||||
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Eagles #34",
|
||||
city="Minneapolis",
|
||||
website="https://www.minneapoliseagles34.org/events-entertainment.html",
|
||||
)
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Eagles #34",
|
||||
website="https://calendar.google.com/calendar/ical/teflgutelllvla7r6vfcmjdjjo%40group.calendar.google.com/public/basic.ics",
|
||||
calendar = Calendar.objects.get(id=1),
|
||||
items = 0,
|
||||
new_items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
|
||||
item_count_start = scraper.items
|
||||
|
||||
event_type = "Mu"
|
||||
|
||||
objIcalData = requests.get(scraper.website)
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
tz = pytz.timezone("US/Central")
|
||||
digitools.getiCalEvents(gcal, scraper)
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
50
events/scrapers/Working/iCal/ical_run.terminalbar-mpls.py
Normal file
50
events/scrapers/Working/iCal/ical_run.terminalbar-mpls.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization, Scraper, Calendar
|
||||
|
||||
import events.digitools as digitools
|
||||
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
odt = datetime.now() + td
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Terminal Bar",
|
||||
city="Minneapolis",
|
||||
website="https://terminalbarmn.com",
|
||||
)
|
||||
event_type = "Mu"
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Terminal Bar",
|
||||
website="https://calendar.google.com/calendar/ical/terminalbar32%40gmail.com/public/basic.ics",
|
||||
calendar = Calendar.objects.get(id=1),
|
||||
items = 0,
|
||||
new_items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
|
||||
item_count_start = scraper.items
|
||||
|
||||
event_type = "Mu"
|
||||
|
||||
objIcalData = requests.get(scraper.website)
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
tz = pytz.timezone("US/Central")
|
||||
digitools.getiCalEvents(gcal, scraper)
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
78
events/scrapers/Working/news/minnpost.mn.py
Normal file
78
events/scrapers/Working/news/minnpost.mn.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
from events.digitools import getBrowser, createURL, createBasicArticle, getSource
|
||||
|
||||
|
||||
org, created = Organization.objects.get_or_create(
|
||||
name="MinnPost",
|
||||
city="Minneapolis",
|
||||
website="https://www.minnpost.com/",
|
||||
is_venue=False,
|
||||
)
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name=org.name,
|
||||
website=org.website,
|
||||
items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=org.name)
|
||||
print("Scraper: ", scraper)
|
||||
|
||||
event_type = "Ja"
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/article')
|
||||
count = 0
|
||||
ppr(contents)
|
||||
for c in contents:
|
||||
try:
|
||||
if count > 10:
|
||||
br.close()
|
||||
quit()
|
||||
article = {}
|
||||
article['title'] = c.xpath('.//*/h2[@class="entry-title"]/a/text()')[0]
|
||||
article['link'] = c.xpath('.//*/h2[@class="entry-title"]/a/@href')[0]
|
||||
createBasicArticle(article, event_type, org)
|
||||
ppr(article)
|
||||
print("Success")
|
||||
count+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(article)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
ps = getSource(br, org.website)
|
||||
get_events(ps, "Ed")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
scraper.save()
|
||||
68
events/scrapers/Working/news/racket.mn.py
Normal file
68
events/scrapers/Working/news/racket.mn.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
from events.digitools import getBrowser, createURL, createBasicArticle, getSource
|
||||
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Racket MN",
|
||||
website="https://racketmn.com",
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
|
||||
org, created = Organization.objects.get_or_create(
|
||||
name="Racket MN",
|
||||
city="Minneapolis",
|
||||
website="https://racketmn.com",
|
||||
is_venue=False,
|
||||
)
|
||||
|
||||
event_type = "Ja"
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
count = 0
|
||||
contents = ps.xpath('.//*/div[@class="PostCard_stackedWrapper__S21Fy"]') + ps.xpath('.//*/div[@class="PostCard_wrapper__uteO3"]')
|
||||
for c in contents:
|
||||
if count > 10:
|
||||
br.close()
|
||||
quit()
|
||||
try:
|
||||
article = {}
|
||||
article['title'] = c.xpath('.//div/a/h3/text()')[0]
|
||||
article['link'] = org.website + c.xpath('.//div/a/@href')[1]
|
||||
createBasicArticle(article, event_type, org)
|
||||
count+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(article)
|
||||
print("\n+++\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
ps = getSource(br, org.website)
|
||||
get_events(ps, "Ed")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
scraper.save()
|
||||
68
events/scrapers/Working/news/sahan.mn.py
Normal file
68
events/scrapers/Working/news/sahan.mn.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
from events.digitools import getBrowser, createURL, createBasicArticle, getSource
|
||||
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Sahan Journal",
|
||||
website="https://sahanjournal.com/",
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
|
||||
org, created = Organization.objects.get_or_create(
|
||||
name="Sahan Journal",
|
||||
city="Minneapolis",
|
||||
website="https://sahanjournal.com/",
|
||||
is_venue=False,
|
||||
)
|
||||
|
||||
event_type = "Ja"
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/article')
|
||||
count = 0
|
||||
for c in contents:
|
||||
try:
|
||||
if count > 10:
|
||||
br.close()
|
||||
quit()
|
||||
article = {}
|
||||
article['title'] = c.xpath('.//*/h2[@class="entry-title"]/a/text()')[0]
|
||||
article['link'] = c.xpath('.//*/h2[@class="entry-title"]/a/@href')[0]
|
||||
createBasicArticle(article, event_type, org)
|
||||
count+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(article)
|
||||
print("\n+++\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
ps = getSource(br, org.website)
|
||||
get_events(ps, "Ed")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
scraper.save()
|
||||
63
events/scrapers/Working/news/unicornriot.py
Normal file
63
events/scrapers/Working/news/unicornriot.py
Normal file
@@ -0,0 +1,63 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
from events.digitools import getBrowser, createURL, createBasicArticle, getSource
|
||||
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Uniocorn Riot",
|
||||
website="https://unicornriot.ninja/",
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
|
||||
org, created = Organization.objects.get_or_create(
|
||||
name="Uniocorn Riot",
|
||||
city="Minneapolis",
|
||||
website="https://unicornriot.ninja/",
|
||||
is_venue=False,
|
||||
)
|
||||
|
||||
event_type = "Ja"
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/article')
|
||||
for c in contents[:10]:
|
||||
try:
|
||||
article = {}
|
||||
article['title'] = c.xpath('.//*/h3[@class="title entry-title is-3"]/a/text()')[0]
|
||||
article['link'] = c.xpath('.//*/h3[@class="title entry-title is-3"]/a/@href')[0]
|
||||
createBasicArticle(article, event_type, org)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(article)
|
||||
print("\n+++\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
ps = getSource(br, org.website)
|
||||
get_events(ps, "Ed")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
scraper.save()
|
||||
132
events/scrapers/Working/smedia/bluesky.py
Normal file
132
events/scrapers/Working/smedia/bluesky.py
Normal file
@@ -0,0 +1,132 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
from atproto import Client
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from socials.models import SocialLink, SocialPost
|
||||
# from digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
USERNAME = "dreamfreely.org"
|
||||
PASSWORD = "Futbol21!@"
|
||||
|
||||
client = Client()
|
||||
client.login(USERNAME, PASSWORD)
|
||||
feed = client.get_author_feed(USERNAME, limit = 100)
|
||||
|
||||
def createSocialLink(post):
|
||||
new_post, created = SocialLink.objects.update_or_create(
|
||||
uri = post['uri'],
|
||||
text = post['text'],
|
||||
link = post['link'],
|
||||
handle = post['handle'],
|
||||
likes = post['likes'],
|
||||
reposts = post['reposts'],
|
||||
quotes = post['quotes'],
|
||||
replies = post['replies'],
|
||||
created_at = post['created_at'],
|
||||
platform = 'bluesky',
|
||||
rt_uri = post['rt_uri'],
|
||||
rt_text = post['rt_text'],
|
||||
rt_link = post['rt_link'],
|
||||
rt_handle = post['rt_handle'],
|
||||
)
|
||||
# print(created, new_post)
|
||||
print("completed write")
|
||||
|
||||
tweets = []
|
||||
|
||||
print(len(feed.feed))
|
||||
|
||||
for post in feed.feed:
|
||||
post = post.post
|
||||
print("\n\nNEW POST\n\n")
|
||||
# try:
|
||||
# ppr(post.embed.record.record.author.handle)
|
||||
# ppr(post.embed.record.record.value.text.split("\n")[:2])
|
||||
# ppr(post.embed.record.record.value.embed.external.uri.split("?")[0])
|
||||
# ppr(post.embed.record.record.uri.split("feed.post/")[1])
|
||||
# except:
|
||||
# pass
|
||||
|
||||
if hasattr(post.record.embed, 'external'):
|
||||
p = {}
|
||||
try:
|
||||
p['link'] = post.record.embed.external.uri.split("?")[0]
|
||||
except:
|
||||
pass
|
||||
p['text'] = " ".join(post.record.text.split("\n")[:2])
|
||||
p['handle'] = post.author.handle
|
||||
p['uri'] = post.uri.split("feed.post/")[1]
|
||||
p['likes'] = post.like_count
|
||||
p['quotes'] = post.quote_count
|
||||
p['replies'] = post.reply_count
|
||||
p['reposts'] = post.repost_count
|
||||
p['created_at'] = post.record.created_at
|
||||
|
||||
p['rt_handle'] = "blank"
|
||||
p['rt_text'] = "blank"
|
||||
p['rt_uri'] = "blank"
|
||||
p['rt_link'] = "blank"
|
||||
|
||||
elif hasattr(post.embed, 'record'):
|
||||
p = {}
|
||||
p['text'] = " ".join(post.record.text.split("\n")[:2])
|
||||
p['handle'] = post.author.handle
|
||||
p['uri'] = post.uri.split("feed.post/")[1]
|
||||
p['likes'] = post.like_count
|
||||
p['quotes'] = post.quote_count
|
||||
p['replies'] = post.reply_count
|
||||
p['reposts'] = post.repost_count
|
||||
p['created_at'] = post.record.created_at
|
||||
p['link'] = "blank"
|
||||
|
||||
try:
|
||||
p['rt_handle'] = post.embed.record.record.author.handle
|
||||
p['rt_text'] = " ".join(post.embed.record.record.value.text.split("\n")[:2])
|
||||
p['rt_uri'] = post.embed.record.record.uri.split("feed.post/")[1]
|
||||
p['rt_link'] = post.embed.record.record.value.embed.external.uri.split("?")[0]
|
||||
except:
|
||||
p['rt_handle'] = "blank"
|
||||
p['rt_text'] = "blank"
|
||||
p['rt_uri'] = "blank"
|
||||
p['rt_link'] = "blank"
|
||||
|
||||
|
||||
else:
|
||||
p = {}
|
||||
p['text'] = " ".join(post.record.text.split("\n")[:2])
|
||||
p['handle'] = post.author.handle
|
||||
p['uri'] = post.uri.split("feed.post/")[1]
|
||||
p['likes'] = post.like_count
|
||||
p['quotes'] = post.quote_count
|
||||
p['replies'] = post.reply_count
|
||||
p['reposts'] = post.repost_count
|
||||
p['created_at'] = post.record.created_at
|
||||
|
||||
p['rt_handle'] = "blank"
|
||||
p['rt_text'] = "blank"
|
||||
p['rt_uri'] = "blank"
|
||||
p['rt_link'] = "blank"
|
||||
p['link'] = "blank"
|
||||
|
||||
# ppr(p)
|
||||
# tweets.append(p)
|
||||
|
||||
try:
|
||||
print('writing file')
|
||||
createSocialLink(p)
|
||||
except Exception as e:
|
||||
ppr(post.record.embed)
|
||||
print(e, "\nthis\n\n")
|
||||
72
events/scrapers/Working/smedia/bluesky_media.py
Normal file
72
events/scrapers/Working/smedia/bluesky_media.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
from atproto import Client
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from socials.models import SocialImg
|
||||
# from digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
USERNAME = "dreamfreely.org"
|
||||
PASSWORD = "Futbol21!@"
|
||||
|
||||
client = Client()
|
||||
client.login(USERNAME, PASSWORD)
|
||||
feed = client.get_author_feed(USERNAME, limit = 100)
|
||||
|
||||
def createSocialImg(post):
|
||||
new_post, created = SocialImg.objects.update_or_create(
|
||||
uri = post['uri'],
|
||||
text = post['text'],
|
||||
img_link = post['img_link'],
|
||||
handle = post['handle'],
|
||||
created_at = post['created_at'],
|
||||
platform = 'bluesky',
|
||||
)
|
||||
print(created, new_post)
|
||||
|
||||
tweets = []
|
||||
|
||||
print(len(feed.feed))
|
||||
|
||||
for post in feed.feed:
|
||||
post = post.post
|
||||
|
||||
# print(post, "\n\n")
|
||||
|
||||
# try:
|
||||
# ppr(post.embed.images[0].fullsize)
|
||||
# # ppr(post.embed.record.record.value.text.split("\n")[:2])
|
||||
# # ppr(post.embed.record.record.value.embed.external.uri.split("?")[0])
|
||||
# # ppr(post.embed.record.record.uri.split("feed.post/")[1])
|
||||
# except Exception as e:
|
||||
# print("failed:", e)
|
||||
|
||||
if hasattr(post.embed, 'images'):
|
||||
p = {}
|
||||
p['img_link'] = post.embed.images[0].fullsize
|
||||
p['text'] = " ".join(post.record.text.split("\n")[:2])
|
||||
p['handle'] = post.author.handle
|
||||
p['uri'] = post.uri.split("feed.post/")[1]
|
||||
p['created_at'] = post.record.created_at
|
||||
|
||||
# ppr(p)
|
||||
tweets.append(p)
|
||||
|
||||
try:
|
||||
print('writing file')
|
||||
createSocialImg(p)
|
||||
except Exception as e:
|
||||
ppr(post.embed)
|
||||
print(e, "\nthis\n\n")
|
||||
72
events/scrapers/Working/smedia/redsky.py
Normal file
72
events/scrapers/Working/smedia/redsky.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import praw
|
||||
|
||||
from socials.models import SocialLink, SocialPost
|
||||
# from digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
|
||||
# timestamp = 1729322547223
|
||||
# dt_object = datetime.datetime.fromtimestamp(timestamp)
|
||||
# print(dt_object)
|
||||
|
||||
reddit = praw.Reddit(
|
||||
client_id="rxW3Ywqke6FZDP7pIhYYuw",
|
||||
client_secret="cg1VNl0I-RTuYUwgz16ryKh2wWKEcA",
|
||||
password="7CTu4sGFi9E0",
|
||||
user_agent="CultureClap",
|
||||
username="cultureclap",
|
||||
)
|
||||
|
||||
|
||||
def createSocialLink(post):
|
||||
new_post, created = SocialLink.objects.update_or_create(
|
||||
text = post['text'],
|
||||
link = post['link'],
|
||||
handle = post['handle'],
|
||||
likes = post['likes'],
|
||||
replies = post['replies'],
|
||||
platform = post['platform'],
|
||||
created_at = post['created_at'],
|
||||
rt_uri = 'blank',
|
||||
rt_text = 'blank',
|
||||
rt_link = 'blank',
|
||||
rt_handle = 'blank',
|
||||
)
|
||||
print(created, new_post)
|
||||
|
||||
count = 0
|
||||
|
||||
for item in reddit.user.me().upvoted():
|
||||
rdt = {}
|
||||
rdt['text'] = item.title + " | " + item.selftext
|
||||
rdt['handle'] = item.author.name
|
||||
rdt['link'] = item.url
|
||||
rdt['likes'] = item.ups
|
||||
rdt['replies'] = len(item.comments.list())
|
||||
rdt['created_at'] = datetime.fromtimestamp(item.created_utc)
|
||||
rdt['platform'] = 'reddit'
|
||||
|
||||
try:
|
||||
print('writing file')
|
||||
createSocialLink(rdt)
|
||||
count +=1
|
||||
if count > 50:
|
||||
quit()
|
||||
except Exception as e:
|
||||
ppr(item)
|
||||
print(e, "\nthis\n\n")
|
||||
# ppr(item)
|
||||
71
events/scrapers/Working/venues/AcmeComedy.Mpls.py
Normal file
71
events/scrapers/Working/venues/AcmeComedy.Mpls.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import events.digitools as digitools
|
||||
|
||||
from events.models import Organization, Scraper, Calendar, Event
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Acme Comedy Club",
|
||||
city="Minneapolis",
|
||||
website="https://acmecomedycompany.com/the-club/calendar/",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/li[@class="event"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
day = c.xpath('.//*/span[@class="day"]/text()')[0]
|
||||
month = c.xpath('.//*/span[@class="mth"]/text()')[0]
|
||||
year = datetime.now().year
|
||||
if month == "Jan":
|
||||
year = int(year) + 1
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/span[@class="event_title"]/a/text()')[0]
|
||||
event['date'] = [month, day, str(year), c.xpath('.//*/span[@class="event_time"]/text()')[0].strip()]
|
||||
event['date'] = " ".join(event['date'])
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = c.xpath('.//*/span[@class="event_title"]/a/@href')[0]
|
||||
digitools.createBasicEvent(event, "Co", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
br.close()
|
||||
quit()
|
||||
|
||||
links = digitools.createURL("https://acmecomedycompany.com/the-club/calendar/")
|
||||
|
||||
for link in links:
|
||||
ps = digitools.getSource(br, link)
|
||||
get_events(ps, "Co")
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
br.close()
|
||||
67
events/scrapers/Working/venues/Amsterdam.StPaul.py
Normal file
67
events/scrapers/Working/venues/Amsterdam.StPaul.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Calendar
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Amsterdam Bar & Hall",
|
||||
city="St. Paul",
|
||||
website="https://www.amsterdambarandhall.com/events-new/",
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
DATETIME_FORMAT = '%B %d %Y %I:%M%p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps):
|
||||
contents = ps.xpath('.//*/ul[@class="events-list"]/li')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
day = c.xpath('.//*/div[@class="date-day"]/text()')[0]
|
||||
month = c.xpath('.//*/div[@class="date-month"]/text()')[0]
|
||||
year = datetime.now().year
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//div/h4/a/text()')[0]
|
||||
event['date'] = [month, day, str(year), c.xpath('.//div[@class="event-info"]/p/text()')[0].split(" ")[0]]
|
||||
event['date'] = " ".join(event['date'])
|
||||
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = c.xpath('.//div[@class="event-info"]/h4/a/@href')[0]
|
||||
if " presents" in event['title']:
|
||||
event['title'] = event['title'].split("presents")[1][1:].strip()
|
||||
if event['title'].startswith('.'):
|
||||
print("BLAHH\n")
|
||||
event['title'] = event['title'][1:].strip()
|
||||
digitools.createBasicEvent(event, "Mu", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = digitools.getSource(br, venue.website)
|
||||
get_events(ps)
|
||||
sleep(3)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
70
events/scrapers/Working/venues/EastsideLibrary.py
Normal file
70
events/scrapers/Working/venues/EastsideLibrary.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Eastside Freedom Library",
|
||||
city="Minneapolis",
|
||||
website="https://eastsidefreedomlibrary.org/events/",
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%B %d @ %I:%M %p %Y'
|
||||
|
||||
def get_events(ps):
|
||||
contents = ps.xpath('.//*/article')
|
||||
# ppr("contents:", contents)
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/h3[@class="tribe-events-calendar-list__event-title tribe-common-h6 tribe-common-h4--min-medium"]/a/text()')[0].strip()
|
||||
event['link'] = c.xpath('.//*/h3[@class="tribe-events-calendar-list__event-title tribe-common-h6 tribe-common-h4--min-medium"]/a/@href')[0]
|
||||
event['date'] = c.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].strip() + " " + current_year
|
||||
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
try:
|
||||
new_event = digitools.createBasicEvent(event, "Ed", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n+++\n")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
calendar_url = 'https://eastsidefreedomlibrary.org/events/'
|
||||
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
|
||||
get_events(ps)
|
||||
|
||||
# ppr(events)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
178
events/scrapers/Working/venues/FirstAveScrape.py
Normal file
178
events/scrapers/Working/venues/FirstAveScrape.py
Normal file
@@ -0,0 +1,178 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
|
||||
from lxml import html
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Event
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="First Avenue",
|
||||
city="Minneapolis",
|
||||
website="https://first-avenue.com",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%b %d %Y %I%p'
|
||||
DATETIME_FORMAT_2 = '%b %d %Y %I:%M%p'
|
||||
DATETIME_FORMAT_3 = '%b %d %Y'
|
||||
|
||||
# Set initial variables for City, etc
|
||||
month = int(datetime.now().month)
|
||||
day = int(datetime.now().day)
|
||||
|
||||
if month == 12:
|
||||
next_month = "01"
|
||||
else:
|
||||
next_month = month + 1
|
||||
if next_month < 10:
|
||||
next_month = "0" + str(next_month)
|
||||
|
||||
if month < 10:
|
||||
month = "0" + str(month)
|
||||
|
||||
year = int(datetime.now().year)
|
||||
|
||||
calendar_url = 'https://first-avenue.com/shows/?start_date=' + str(year) + str(month) + str(day)
|
||||
|
||||
next_month_string = str(next_month) + "01"
|
||||
|
||||
if next_month == 1:
|
||||
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
|
||||
else:
|
||||
if int(next_month) == 1:
|
||||
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
|
||||
else:
|
||||
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year) + next_month_string
|
||||
|
||||
|
||||
print("\n\n", calendar_url, calendar_url_2, "\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
if datetime.now().day < 8:
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
|
||||
elif 7 < datetime.now().day < 15:
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
|
||||
elif 14 < datetime.now().day < 21:
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:95]
|
||||
ps = digitools.getSource(br, calendar_url_2)
|
||||
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:31]
|
||||
else:
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
|
||||
|
||||
ps = digitools.getSource(br, calendar_url_2)
|
||||
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
|
||||
|
||||
events = []
|
||||
|
||||
def get_info(pse):
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event["venue"] = pse.xpath('.//*/div[@class="content"]/div/div[@class="venue_name"]/text()')[0].replace('\t', '').replace('\n', '').strip()
|
||||
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
if event["show_title"] == "":
|
||||
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[2].replace('\t', '').replace('\n', '')
|
||||
event["guests"] = pse.xpath('.//*/div[@class="feature_details_main d-flex align-items-center"]/div/h4/text()')
|
||||
event["flyer"] = pse.xpath('.//*/img[@class="gig_poster lazy loaded"]/@src')
|
||||
try:
|
||||
event = get_date(pse, event)
|
||||
except Exception as e:
|
||||
print("date issue: ", e)
|
||||
try:
|
||||
event = get_details(pse, event)
|
||||
except Exception as e:
|
||||
print("details issue: ", e)
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT)
|
||||
except Exception as e:
|
||||
print("Using alt date format 2: ", e)
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT_2)
|
||||
ppr(event)
|
||||
except Exception as e:
|
||||
print("Using alt date format 3: ", e)
|
||||
print(event['date'])
|
||||
event["date_time"] = datetime.strptime(" ".join(event["date"]), DATETIME_FORMAT_3)
|
||||
return event
|
||||
|
||||
def get_date(pse, event):
|
||||
month = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="month"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
day = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="day"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
year = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="year"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
event["date"] = [month, day, year]
|
||||
return event
|
||||
|
||||
def get_details(pse, event):
|
||||
try:
|
||||
details = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h6/text()')
|
||||
info = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h2/text()')
|
||||
di = zip(details, info)
|
||||
details = {}
|
||||
for d,i in di:
|
||||
details[d] = i
|
||||
event["details"] = details
|
||||
return event
|
||||
except Exception as e:
|
||||
print("details issue: ", e)
|
||||
|
||||
for show in shows:
|
||||
br.get(show)
|
||||
sleep(2)
|
||||
try:
|
||||
pse = html.fromstring(br.page_source)
|
||||
except Exception as e:
|
||||
print(show)
|
||||
pass
|
||||
try:
|
||||
event = get_info(pse)
|
||||
except Exception as e:
|
||||
print("get_info error: ", e)
|
||||
try:
|
||||
event["link"] = show
|
||||
if event["venue"] in ["Palace Theater", "Turf Club", "The Fitzgerald Theater", "Amsterdam Bar & Hall"]:
|
||||
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="St. Paul")
|
||||
else:
|
||||
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="Minneapolis")
|
||||
except Exception as e:
|
||||
print("Venue creation error: ", e, "\n", event, "\n", event["venue"])
|
||||
try:
|
||||
event['dateStamp'] = event['date_time']
|
||||
event['scraper'] = scraper
|
||||
new_event, created = digitools.createDetailedEvent(event, "Mu", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print("event creation error: ", e, "\n\n", event, "\n\n", created)
|
||||
quit()
|
||||
|
||||
ppr(events)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
|
||||
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()
|
||||
68
events/scrapers/Working/venues/GinkgoCoffee.stp.py
Normal file
68
events/scrapers/Working/venues/GinkgoCoffee.stp.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Ginkgo Coffee",
|
||||
city="Saint Paul",
|
||||
website="https://ginkgocoffee.com/events/",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
event_type = ""
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT_2 = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT = '%B %d @ %I:%M %p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/article')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
dateTime = c.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0]
|
||||
month = c.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].split(' ')[0]
|
||||
year = datetime.now().year
|
||||
if month == "January":
|
||||
year = int(year) + 1
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/h3/a/text()')[0].replace("\n", "").replace("\t", "")
|
||||
event['date'] = " ".join([ dateTime, str(year)])
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = c.xpath('.//*/h3/a/@href')[0]
|
||||
digitools.createBasicEvent(event, event_type, venue)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = digitools.getSource(br, venue.website)
|
||||
get_events(ps, "Mu")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
71
events/scrapers/Working/venues/GreenRoom.Mpls.py
Normal file
71
events/scrapers/Working/venues/GreenRoom.Mpls.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Green Room",
|
||||
city="Minneapolis",
|
||||
website="https://www.greenroommn.com/events",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
event_type = "Mu"
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%a %b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="vp-event-card vp-venue-greenroom vp-col"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
time = c.xpath('.//*/span[@class="vp-time"]/text()')[0].strip()
|
||||
date = c.xpath('.//*/span[@class="vp-date"]/text()')[0].strip()
|
||||
month = date.split(" ")[1]
|
||||
year = datetime.now().year
|
||||
# if month == "Jan":
|
||||
# year = int(year) + 1
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/div[@class="vp-event-name"]/text()')[0]
|
||||
event['datetime'] = date + " " + str(year) + " " + time
|
||||
event['dateStamp'] = datetime.strptime(event['datetime'], DATETIME_FORMAT)
|
||||
event['link'] = venue.website + c.xpath('.//a[@class="vp-event-link"]/@href')[0]
|
||||
digitools.createBasicEvent(event, event_type, venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n+++\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = digitools.getSource(br, venue.website)
|
||||
get_events(ps, event_type)
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
96
events/scrapers/Working/venues/HookLadderScrape.py
Normal file
96
events/scrapers/Working/venues/HookLadderScrape.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Event
|
||||
import events.digitools as digitools
|
||||
|
||||
|
||||
from lxml import html
|
||||
|
||||
count = 0
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Hook & Ladder",
|
||||
city="Minneapolis",
|
||||
website="https://thehookmpls.com",
|
||||
is_venue=True,
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
ppr(scraper)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%a, %b %d, %Y @ %I:%M %p'
|
||||
|
||||
# Set initial variables for City, etc
|
||||
calendar_url = [
|
||||
"https://thehookmpls.com/events/list/page/1",
|
||||
"https://thehookmpls.com/events/list/page/2",
|
||||
"https://thehookmpls.com/events/list/page/3"
|
||||
]
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
def get_listings(pse, events):
|
||||
nevents = pse.xpath('.//*/article')
|
||||
for event in nevents:
|
||||
e = {}
|
||||
e['datetime'] = event.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0]
|
||||
e['show_title'] = event.xpath('.//*/header/h2/a/@title')[0]
|
||||
e['link'] = event.xpath('.//*/header/h2/a/@href')[0]
|
||||
try:
|
||||
e['subtitle'] = event.xpath('.//*/header/div[@class="eventSubHead"]/text()')[0]
|
||||
except:
|
||||
continue
|
||||
try:
|
||||
e['price'] = event.xpath('.//*/span[@class="tribe-events-c-small-cta__price"]/strong/text()')[0].replace("Tickets ", "")
|
||||
except:
|
||||
e['price'] = "See Link"
|
||||
e['image'] = event.xpath('.//*/img/@data-src')[0]
|
||||
e["date_time"] = datetime.strptime(e['datetime'], DATETIME_FORMAT)
|
||||
e['scraper'] = scraper
|
||||
e['calendar'] = scraper.calendar
|
||||
events.append(e)
|
||||
|
||||
events = []
|
||||
|
||||
for cal in calendar_url:
|
||||
br.get(cal)
|
||||
sleep(3)
|
||||
pse = html.fromstring(br.page_source)
|
||||
get_listings(pse, events)
|
||||
|
||||
for event in events:
|
||||
try:
|
||||
new_event = Event.objects.update_or_create(
|
||||
calendar = event['calendar'],
|
||||
scraper = event['scraper'],
|
||||
event_type = 'Mu',
|
||||
show_title = event["show_title"],
|
||||
show_link = event["link"],
|
||||
show_date = event["date_time"],
|
||||
show_day = event["date_time"],
|
||||
guests = " ".join(event["subtitle"]),
|
||||
venue = venue
|
||||
)
|
||||
except Exception as e:
|
||||
print("oops ", e, "\n\n", "Scraper:", scraper)
|
||||
|
||||
br.close()
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
70
events/scrapers/Working/venues/MagersQuinn.py
Normal file
70
events/scrapers/Working/venues/MagersQuinn.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Magers & Quinn",
|
||||
city="Minneapolis",
|
||||
website="https://www.magersandquinn.com/events",
|
||||
is_venue=False
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
DATETIME_FORMAT = '%A, %B %d , %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="day has-event"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
day = c.xpath('.//*/div[@class="dd"]/text()')[0]
|
||||
month = c.xpath('.//*/div[@class="month"]/text()')[0]
|
||||
year = c.xpath('.//*/div[@class="year"]/text()')[0]
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/h3/text()')[0]
|
||||
event['date'] = [month, day, year, c.xpath('.//*/p[@class="time"]/text()')[0]]
|
||||
event['date'] = " ".join(event['date'])
|
||||
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = "https://www.magersandquinn.com" + c.xpath('.//a[@class="event in-store"]/@href')[0]
|
||||
digitools.createBasicEvent(event, "Ed", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
event['link'] = "https://www.magersandquinn.com" + c.xpath('.//a[@class="event off-site"]/@href')[0]
|
||||
print(e)
|
||||
ppr(event)
|
||||
digitools.createBasicEvent(event, "Ed", venue)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
|
||||
links = digitools.createBasicURL("https://www.magersandquinn.com/events/")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
for link in links:
|
||||
ps = digitools.getSource(br, link)
|
||||
get_events(ps, "Ed")
|
||||
sleep(3)
|
||||
# ppr(events)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
80
events/scrapers/Working/venues/MplsVFW.py
Normal file
80
events/scrapers/Working/venues/MplsVFW.py
Normal file
@@ -0,0 +1,80 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Event
|
||||
import events.digitools as digitools
|
||||
|
||||
|
||||
from selenium.webdriver.common.by import By
|
||||
from lxml import html
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Uptown VFW",
|
||||
city="Minneapolis",
|
||||
website="https://noboolpresents.com/venues/uptown-vfw/",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%a %B %d @ %I:%M %p %Y'
|
||||
DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y'
|
||||
DATETIME_FORMAT_3 = '%b %d %Y'
|
||||
# Set initial variables for City, etc
|
||||
calendar_url = 'https://noboolpresents.com/venues/uptown-vfw/'
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(30)
|
||||
|
||||
def getEvents(br):
|
||||
ps = html.fromstring(br.page_source)
|
||||
events = ps.xpath('.//*/article')
|
||||
for event in events:
|
||||
deets = {}
|
||||
dateTime = event.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].replace("•", "").strip() + " " + current_year
|
||||
title = event.xpath('.//*/h2[@class="alt-font"]/a/text()')[0].replace("\n", "").replace("\t", "")
|
||||
link = event.xpath('.//*/h2[@class="alt-font"]/a/@href')[0]
|
||||
deets["tickets"] = event.xpath('.//*/span[@class="tribe-events-c-small-cta__price"]/strong/text()')[0]
|
||||
try:
|
||||
new_event = Event.objects.update_or_create(
|
||||
calendar = scraper.calendar,
|
||||
scraper = scraper,
|
||||
event_type = 'Mu',
|
||||
show_title = title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
|
||||
more_details = deets["tickets"],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print("oops", e)
|
||||
|
||||
getEvents(br)
|
||||
br.find_element(By.XPATH, './/*/li[@class="tribe-events-c-nav__list-item tribe-events-c-nav__list-item--next"]/a').click()
|
||||
sleep(5)
|
||||
getEvents(br)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
106
events/scrapers/Working/venues/ParkwayTheater.py
Normal file
106
events/scrapers/Working/venues/ParkwayTheater.py
Normal file
@@ -0,0 +1,106 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Event as DSEvent
|
||||
import events.digitools as digitools
|
||||
|
||||
|
||||
try:
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Parkway Theater",
|
||||
city="Minneapolis",
|
||||
website="https://theparkwaytheater.com",
|
||||
is_venue = True
|
||||
)
|
||||
except Exception as e:
|
||||
venue = Organization.objects.get(name="Parkway Theater")
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%b %d, %Y %I:%M %p'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="summary-content sqs-gallery-meta-container"]')
|
||||
img_etc = ps.xpath('.//*/div[@class="summary-thumbnail-outer-container"]/a/div/img/@src')
|
||||
ps.xpath('.//*/span[@class="event-time-12hr"]/text()')
|
||||
for c,i in zip(contents,img_etc):
|
||||
try:
|
||||
event = {}
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/a[@class="summary-title-link"]/text()')[0]
|
||||
event['link'] = "https://theparkwaytheater.com" + c.xpath('.//*/a[@class="summary-title-link"]/@href')[0]
|
||||
event['date'] = c.xpath('.//div/div/time/text()')[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("–")[0].strip()
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['desc'] = c.xpath('.//*/p/text()')[0]
|
||||
event['img_link'] = i
|
||||
event['details'] = {
|
||||
'description': event['desc'],
|
||||
'img_link': event['img_link'],
|
||||
}
|
||||
|
||||
try:
|
||||
new_event = DSEvent.objects.update_or_create(
|
||||
calendar = scraper.calendar,
|
||||
scraper = scraper,
|
||||
event_type = event_type,
|
||||
show_title = event['title'],
|
||||
show_link = event['link'],
|
||||
show_date = datetime.strptime(event['date'], DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(event['date'], DATETIME_FORMAT),
|
||||
more_details = event["details"],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
try:
|
||||
event['date'] = c.xpath('.//div/div/time/text()')[0].split("–")[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("–")[0].strip()
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
new_event = DSEvent.objects.update_or_create(
|
||||
calendar = scraper.calendar,
|
||||
scraper = scraper,
|
||||
event_type = event_type,
|
||||
show_title = event['title'],
|
||||
show_link = event['link'],
|
||||
show_date = datetime.strptime(event['date'], DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(event['date'], DATETIME_FORMAT),
|
||||
more_details = event["details"],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("\n\n+++\n\n")
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
calendar_url = 'https://theparkwaytheater.com/live-events'
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
get_events(ps, "Mu")
|
||||
|
||||
calendar_url = "https://theparkwaytheater.com/movies"
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
get_events(ps, "Th")
|
||||
|
||||
# ppr(events)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
98
events/scrapers/Working/venues/SPCO.stp.py
Normal file
98
events/scrapers/Working/venues/SPCO.stp.py
Normal file
@@ -0,0 +1,98 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="St Paul Chamber Orchestra",
|
||||
city="St Paul",
|
||||
website="https://thespco.org",
|
||||
is_venue = False
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%A, %B %d, %Y – %I:%M %p'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="event-title"]/a/@href')
|
||||
for c in set(contents):
|
||||
try:
|
||||
link = 'https://content.thespco.org' + c
|
||||
ps = digitools.getSource(br, link)
|
||||
ntitle = ps.xpath('.//*/article/h1/text()')
|
||||
subtitle = ps.xpath('.//*/article/h1/em/text()')
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
if len(subtitle) == 1:
|
||||
if len(ntitle) == 2:
|
||||
title = ntitle[0] + subtitle[0] + ntitle[1]
|
||||
elif ntitle[0].startswith(" "):
|
||||
title = subtitle[0] + ntitle[0]
|
||||
else:
|
||||
title = ntitle[0] + subtitle[0]
|
||||
else:
|
||||
title = ntitle[0]
|
||||
|
||||
events = ps.xpath('.//*/div[@class="day"]')
|
||||
for e in events:
|
||||
new_venue = e.xpath('.//*/strong[@class="venue"]/text()')[0].strip()
|
||||
location = e.xpath('.//*/span[@class="location"]/text()')[0].strip()
|
||||
if 'Minneapolis' in location:
|
||||
location = 'Minneapolis'
|
||||
elif 'St. Paul' in location:
|
||||
location = 'St. Paul'
|
||||
else:
|
||||
location = location
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name=new_venue,
|
||||
city=location,
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
dateTime = e.xpath('.//*/h3[@class="date"]/text()')[0].replace("\n", "").replace("\t", "").strip()
|
||||
event['dateStamp'] = datetime.strptime(dateTime, DATETIME_FORMAT)
|
||||
event['venue'] = venue
|
||||
event['location'] = location
|
||||
event['title'] = "SPCO: " + title
|
||||
event['link'] = link
|
||||
event_type = "Mu"
|
||||
digitools.createBasicEvent(event, event_type, venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print("ERROR: ", e)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
# Get Event Page Link(s)
|
||||
links = digitools.createURLNoZero("https://content.thespco.org/events/calendar/")
|
||||
|
||||
for link in links:
|
||||
ps = digitools.getSource(br, link)
|
||||
get_events(ps, "Mu")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
71
events/scrapers/Working/venues/WhiteSquirrelScrape.py
Normal file
71
events/scrapers/Working/venues/WhiteSquirrelScrape.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%Y-%m-%d %I:%M %p'
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="White Squirrel",
|
||||
city="St. Paul",
|
||||
website="https://whitesquirrelbar.com",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
|
||||
# Set initial variables for City, etc
|
||||
calendar_url = [
|
||||
'https://whitesquirrelbar.com/calendar/list/page/1/',
|
||||
'https://whitesquirrelbar.com/calendar/list/page/2/',
|
||||
'https://whitesquirrelbar.com/calendar/list/page/3/'
|
||||
]
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
def get_listings(pse, events):
|
||||
listings = pse.xpath('.//*/div[@class="tribe-common-g-row tribe-events-calendar-list__event-row"]')
|
||||
for l in listings:
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
try:
|
||||
event["image"] = l.xpath('.//*/img/@src')[0]
|
||||
except:
|
||||
event["image"] = "none"
|
||||
event["date"] = l.xpath('.//time/@datetime')[0]
|
||||
event["time"] = l.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].split("@")[1]
|
||||
event["title"] = l.xpath('.//*/h3/a/text()')[0].replace("\t", "").replace("\n", "")
|
||||
event["link"] = l.xpath('.//*/h3/a/@href')[0]
|
||||
event['datetime'] = event['date'] + " " + event['time']
|
||||
event["dateStamp"] = datetime.strptime(event['datetime'] , DATETIME_FORMAT)
|
||||
events.append(event)
|
||||
digitools.createBasicEvent(event, "Mu", venue)
|
||||
scraper.items+=1
|
||||
|
||||
events = []
|
||||
|
||||
for cal in calendar_url:
|
||||
ps = digitools.getSource(br, cal)
|
||||
get_listings(ps, events)
|
||||
|
||||
br.close()
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
74
events/scrapers/Working/venues/cedar.mpls.py
Normal file
74
events/scrapers/Working/venues/cedar.mpls.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Cedar Cultural Center",
|
||||
city="Minneapolis",
|
||||
website="https://www.thecedar.org",
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
DATETIME_FORMAT_3 = '%A, %B %d at %I:%M%p %Y'
|
||||
DATETIME_FORMAT_4 = '%A, %B %d at %I%p %Y'
|
||||
DATETIME_FORMAT_5 = '%A, %B %d @%I%p %Y'
|
||||
|
||||
def get_events(ps):
|
||||
links = ps.xpath('.//*/div[@class="summary-title"]/a/@href')
|
||||
for l in links:
|
||||
if "cedar-news-blog" in l:
|
||||
continue
|
||||
pse = digitools.getSource(br, "https://www.thecedar.org" + l)
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['link'] = "https://www.thecedar.org" + l
|
||||
try:
|
||||
time = pse.xpath('.//*/time[@class="event-time-localized-start"]/text()')[0]
|
||||
date = pse.xpath('.//*/time[@class="event-date"]/text()')[0]
|
||||
event['title'] = pse.xpath('.//*/h1[@class="eventitem-title"]/text()')[0]
|
||||
except:
|
||||
try:
|
||||
time = pse.xpath('.//*/time[@class="event-time-localized"]/text()')[0]
|
||||
date = pse.xpath('.//*/time[@class="event-date"]/text()')[0]
|
||||
event['title'] = pse.xpath('.//*/h1[@class="eventitem-title"]/text()')[0]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("failed event: ", event)
|
||||
dateStamp = date + " " + time
|
||||
event['dateStamp'] = datetime.strptime(dateStamp, DATETIME_FORMAT)
|
||||
digitools.createBasicEvent(event, "Mu", venue)
|
||||
scraper.items+=1
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
calendar_url = 'https://www.thecedar.org/listing'
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
get_events(ps)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
105
events/scrapers/Working/venues/club331Scrape.py
Normal file
105
events/scrapers/Working/venues/club331Scrape.py
Normal file
@@ -0,0 +1,105 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from selenium.webdriver.common.by import By
|
||||
from lxml import html
|
||||
|
||||
from events.models import Organization, Scraper, Event
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Club 331",
|
||||
city="Minneapolis",
|
||||
website="https://331club.com",
|
||||
is_venue=True,
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%b %d %I%p %Y'
|
||||
DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y'
|
||||
DATETIME_FORMAT_3 = '%b %d %Y'
|
||||
# Set initial variables for City, etc
|
||||
calendar_url = 'https://331club.com/#calendar'
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(3)
|
||||
|
||||
br.find_element(By.CLASS_NAME, 'more_events').click()
|
||||
sleep(2)
|
||||
ps = html.fromstring(br.page_source)
|
||||
sleep(3)
|
||||
|
||||
dates = ps.xpath('.//*/div[@class="event"]')
|
||||
dates = dates + ps.xpath('.//*/div[@class="event hidden"]')
|
||||
|
||||
def process_times(times):
|
||||
# print("Times: ", times)
|
||||
time = []
|
||||
for t in times:
|
||||
t = t.replace("\n", "").replace("TBA", "")
|
||||
if len(t) > 0 and t.endswith("pm"):
|
||||
if "-" in t:
|
||||
t = t.split("-")[0] + "pm"
|
||||
time.append(t)
|
||||
return time
|
||||
|
||||
events = []
|
||||
|
||||
for d in dates:
|
||||
event_date = d.xpath('.//div[@class="event-date"]/span/text()')[:2]
|
||||
cols = d.xpath('.//div[@class="column"]')
|
||||
for c in cols:
|
||||
bands = c.xpath('.//p/a/text()')
|
||||
links = c.xpath('.//p/a/@href')
|
||||
time = process_times(c.xpath('.//p/text()'))
|
||||
event = {}
|
||||
event["datetime"] = event_date + time + [current_year]
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT)
|
||||
except:
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_2)
|
||||
except:
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_3)
|
||||
except:
|
||||
event["date_time"] = "Invalid"
|
||||
event["bands"] = (", ").join(bands)
|
||||
if len(bands) > 0:
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = event["bands"]
|
||||
event['date'] = event["date_time"]
|
||||
event['dateStamp'] = event["date_time"]
|
||||
event['link'] = "https://331club.com/#calendar"
|
||||
|
||||
try:
|
||||
digitools.createBasicEvent(event, "Mu", venue)
|
||||
except Exception as e:
|
||||
print('oops', e)
|
||||
events.append(event)
|
||||
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
64
events/scrapers/Working/workshop/ComedyUnderground.py
Normal file
64
events/scrapers/Working/workshop/ComedyUnderground.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
|
||||
from events.models import Event as DSEvent, Organization
|
||||
from digitools import getBrowser, createDashURL, createBasicEvent, getSource
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Comedy Corner",
|
||||
city="Minneapolis",
|
||||
website="https://comedycornerunderground.com/calendar",
|
||||
)
|
||||
|
||||
calendar_url = "https://comedycornerunderground.com/calendar"
|
||||
|
||||
DATETIME_FORMAT = '%m %d %I%p %Y'
|
||||
|
||||
def get_events(ps, link):
|
||||
contents = ps.xpath('.//*/td')
|
||||
for c in contents:
|
||||
try:
|
||||
day_num = c.xpath('.//*/div[@class="marker-daynum"]/text()')[0]
|
||||
events = c.xpath('.//*/li')
|
||||
# print(events)
|
||||
for e in events:
|
||||
event = {}
|
||||
print(link)
|
||||
month = link.split("month=")[1].split("-")[0]
|
||||
year = link.split("month=")[1].split("-")[1]
|
||||
event['title'] = e.xpath('.//*/span[@class="item-title"]/text()')[0]
|
||||
event['time'] = e.xpath('.//*/span[@class="item-time item-time--12hr"]/text()')[0].replace("\xa0", "")
|
||||
event['link'] = "https://comedycornerunderground.com" + e.xpath('.//a/@href')[0]
|
||||
event['date'] = str(day_num) + ' ' + event['time'] + 'm'
|
||||
dateStamp = month + ' ' + event['date'] + ' ' + year
|
||||
event['dateStamp'] = datetime.strptime(dateStamp, DATETIME_FORMAT)
|
||||
createBasicEvent(event, 'Co')
|
||||
print('\n\n++\n\n')
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
links = createDashURL("https://comedycornerunderground.com/calendar?view=calendar&month=")
|
||||
|
||||
for link in links:
|
||||
ps = getSource(br, link)
|
||||
get_events(ps, link)
|
||||
sleep(5)
|
||||
|
||||
# ppr(events)
|
||||
br.close()
|
||||
74
events/scrapers/Working/workshop/cabooze.mn.py
Normal file
74
events/scrapers/Working/workshop/cabooze.mn.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Cabooze",
|
||||
city="Minneapolis",
|
||||
website="https://www.cabooze.com/#/events",
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
event_type = ""
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%b %d %I:%M %p %Y'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
print("Getting events ...")
|
||||
contents = ps.xpath('.//*/div[@class="vp-event-row vp-widget-reset vp-venue-thecabooze"]')
|
||||
ppr(contents)
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
date = c.xpath('.//*/span[@class="vp-month-n-day"]/text()')[0]
|
||||
print(date)
|
||||
month = date.split(" ")[0]
|
||||
time = c.xpath('.//*/span[@class="vp-time"]/text()')[0]
|
||||
year = datetime.now().year
|
||||
if month == "Jan":
|
||||
year = int(year) + 1
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/div[@class="vp-event-name"]/text()')[0]
|
||||
event['date'] = [date, time, str(year)]
|
||||
event['date'] = " ".join(event['date'])
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = "https://www.cabooze.com/" + c.xpath('.//a[@class="vp-event-link"]/@href')[0]
|
||||
print("Event Dict Created")
|
||||
ppr(event)
|
||||
digitools.createBasicEvent(event, event_type, venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = digitools.getSource(br, venue.website)
|
||||
get_events(ps, "Mu")
|
||||
sleep(3)
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
br.close()
|
||||
82
events/scrapers/Working/workshop/dakota.mpls.py
Normal file
82
events/scrapers/Working/workshop/dakota.mpls.py
Normal file
@@ -0,0 +1,82 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
count = 0
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Dakota",
|
||||
city="Minneapolis",
|
||||
website="https://www.dakotacooks.com/events/",
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
event_type = ""
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%a %b %d, %Y • %I:%M%p'
|
||||
DATETIME_FORMAT_2 = '%a %b %d, %Y • %I%p'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
links = ps.xpath('.//*/div[@class="wicked-event-title tooltipstered"]/a/@href')
|
||||
links = set(links)
|
||||
for l in links:
|
||||
pse = getSource(br, l)
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['link'] = l
|
||||
try:
|
||||
event['time'] = pse.xpath('.//*/span[@class="text-uppercase"]/text()')[0].strip()
|
||||
event['dateStamp'] = datetime.strptime(event['time'], DATETIME_FORMAT)
|
||||
event['title'] = pse.xpath('.//*/div[@class="sidebar-group"]/h1/text()')[0]
|
||||
# event['detail-headers'] = pse.xpath('.//*/ul[@class="eo-event-meta"]/li/strong/text()')
|
||||
# event['details'] = pse.xpath('.//*/ul[@class="eo-event-meta"]/li/text()')
|
||||
|
||||
except:
|
||||
try:
|
||||
event['title'] = pse.xpath('.//*/div[@class="sidebar-group"]/h1/text()')[0]
|
||||
event['dateStamp'] = datetime.strptime(event['time'], DATETIME_FORMAT_2)
|
||||
event['time'] = pse.xpath('.//*/span[@class="text-uppercase"]/text()')[0]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("failed event: ", event)
|
||||
ppr(event)
|
||||
try:
|
||||
digitools.createBasicEvent(event, "Mu", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('failed to create: ', event)
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
ps = digitools.getSource(br, venue.website + '?wicked_month=04&wicked_year=2025&wicked_view=month')
|
||||
get_events(ps, "Mu")
|
||||
sleep(1)
|
||||
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
|
||||
br.close()
|
||||
78
events/scrapers/Working/workshop/icehouse.mpls.py
Normal file
78
events/scrapers/Working/workshop/icehouse.mpls.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
count = 0
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%a, %b %d %Y %I:%M %p SHOW'
|
||||
DATETIME_FORMAT_2 = '%a, %b %d %Y %I:%M %p SHOW'
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Icehouse",
|
||||
city="Minneapolis",
|
||||
website = "https://icehouse.turntabletickets.com",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="performances whitespace-pre-line w-full md:w-3/4"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/h3[@class="text-3xl font-semibold font-heading mr-auto"]/text()')[0]
|
||||
event['link'] = venue.website + c.xpath('.//*/a[@class="show-link"]/@href')[0]
|
||||
event['date'] = c.xpath('.//*/h4[@class="day-of-week"]/text()')[0]
|
||||
year = datetime.now().year
|
||||
if "Brunch" in event['title']:
|
||||
event['time'] = "11:00 AM SHOW"
|
||||
else:
|
||||
event['time'] = c.xpath('.//*/div[@class="performance-btn"]/button/text()')[0]
|
||||
|
||||
event['datetime'] = event['date'] + " " + str(year) + " " + event['time']
|
||||
try:
|
||||
event['dateStamp'] =datetime.strptime(event['datetime'], DATETIME_FORMAT)
|
||||
except:
|
||||
event['datetime'] = event['date'] + " " + str(year) + " " + "07:00 PM SHOW"
|
||||
event['dateStamp'] =datetime.strptime(event['datetime'], DATETIME_FORMAT)
|
||||
event['title'] = event['title'] + " (Time Estimated)"
|
||||
try:
|
||||
digitools.createBasicEvent(event, event_type, venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
quit()
|
||||
|
||||
except Exception as e:
|
||||
ppr(event)
|
||||
print(e)
|
||||
quit()
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = digitools.getSource(br, venue.website)
|
||||
get_events(ps, "Mu")
|
||||
|
||||
# ppr(events)
|
||||
br.close()
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
86
events/scrapers/Working/workshop/pillarforum.mpls.py
Normal file
86
events/scrapers/Working/workshop/pillarforum.mpls.py
Normal file
@@ -0,0 +1,86 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Piller Forum",
|
||||
city="Minneapolis",
|
||||
website="https://www.pilllar.com/pages/events",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
event_type = "Mu"
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%b. %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_night = '%b. %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%b. %d %Y %I:%Mam'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="sse-column sse-half sse-center"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['link'] = venue.website
|
||||
# time = c.xpath('.//*/span[@class="vp-time"]/text()')[0].strip()
|
||||
date = c.xpath('.//h1[@class="sse-size-64"]/text()')[0]
|
||||
if len(date) > 1:
|
||||
print(date)
|
||||
year = datetime.now().year
|
||||
event_date = date + " " + str(year)
|
||||
event['title'] = c.xpath('.//p/span/b/text()')[0]
|
||||
details = c.xpath('.//p/text()')
|
||||
if 'Music' in details[-1]:
|
||||
event_time = c.xpath('.//p/text()')[-1].split("Music")[1].strip()
|
||||
event_type = "Mu"
|
||||
event_dt = event_date + " " + event_time + " PM"
|
||||
event['dateStamp'] = datetime.strptime(event_dt, DATETIME_FORMAT_night)
|
||||
elif len(details) == 1:
|
||||
try:
|
||||
event_time = details[0].split("-")[0].strip()
|
||||
event_dt = event_date + " " + event_time + ' PM'
|
||||
event['dateStamp'] = datetime.strptime(event_dt, DATETIME_FORMAT_night)
|
||||
event_type = "Ot"
|
||||
except Exception as e:
|
||||
event_time = details[0].split("-")[0].strip()
|
||||
event_dt = event_date + " " + event_time
|
||||
event['dateStamp'] = datetime.strptime(event_dt, DATETIME_FORMAT_2)
|
||||
event_type = "Ot"
|
||||
digitools.createBasicEvent(event, event_type, venue)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = digitools.getSource(br, venue.website)
|
||||
get_events(ps, event_type)
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
30
events/scrapers/clean_up.py
Normal file
30
events/scrapers/clean_up.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import re, os, sys
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from django.db.models import Count
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event, Organization
|
||||
|
||||
new_time = datetime.now() - timedelta(days=1)
|
||||
right_bound_time = datetime.now() + timedelta(days=45)
|
||||
events = Event.objects.filter(show_date__lte=new_time)
|
||||
events1 = Event.objects.filter(show_date__gte=right_bound_time)
|
||||
|
||||
for e in events:
|
||||
e.delete()
|
||||
|
||||
for e in events1:
|
||||
e.delete()
|
||||
|
||||
org_sin_events = Organization.objects.annotate(num_events = Count('event')).filter(num_events__lt=1).filter(is_501c=False)
|
||||
|
||||
for org in org_sin_events:
|
||||
print(org)
|
||||
org.delete()
|
||||
|
||||
print("completed and cleaned scrapes")
|
||||
16
events/scrapers/run_govt.sh
Normal file
16
events/scrapers/run_govt.sh
Normal file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
BASEDIR=/home/canin/Downloads/DigiSnaxxEvents
|
||||
DJANGODIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events
|
||||
EVENTDIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events/event_scrapers
|
||||
GOVTDIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events/event_scrapers/Working/govt
|
||||
|
||||
cd $GOVTDIR
|
||||
for file in *
|
||||
do
|
||||
python "$file" $1
|
||||
echo "SCRIPT COMPLETE"
|
||||
done
|
||||
|
||||
cd $EVENTDIR
|
||||
python clean_up.py
|
||||
16
events/scrapers/run_ical.sh
Normal file
16
events/scrapers/run_ical.sh
Normal file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
BASEDIR=/home/canin/Downloads/DigiSnaxxEvents
|
||||
DJANGODIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events
|
||||
EVENTDIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events/event_scrapers
|
||||
ICALDIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events/event_scrapers/Working/iCal
|
||||
|
||||
cd $ICALDIR
|
||||
for file in *
|
||||
do
|
||||
python "$file"
|
||||
echo "SCRIPT COMPLETE"
|
||||
done
|
||||
|
||||
cd $EVENTDIR
|
||||
python clean_up.py
|
||||
21
events/scrapers/run_media_update.sh
Normal file
21
events/scrapers/run_media_update.sh
Normal file
@@ -0,0 +1,21 @@
|
||||
#!/bin/bash
|
||||
|
||||
# BASEDIR=/var/www/digisnaxx.com/
|
||||
# DJANGODIR=/var/www/digisnaxx.com/ds_events
|
||||
# EVENTDIR=/var/www/digisnaxx.com/ds_events/event_scrapers
|
||||
|
||||
ENVDIR=/home/canin/Downloads/DigiSnaxxEvents
|
||||
DJANGODIR=/home/canin/Documents/repos/digisnaxx/ds_events
|
||||
WORKMEDIADIR=/home/canin/Documents/repos/digisnaxx/ds_events/event_scrapers/Working/smedia
|
||||
|
||||
cd $ENVDIR
|
||||
pwd
|
||||
source venv/bin/activate
|
||||
|
||||
cd $WORKMEDIADIR
|
||||
|
||||
python bluesky.py
|
||||
python bluesky_media.py
|
||||
python redsky.py
|
||||
|
||||
deactivate
|
||||
16
events/scrapers/run_news.sh
Normal file
16
events/scrapers/run_news.sh
Normal file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
BASEDIR=/home/canin/Downloads/DigiSnaxxEvents
|
||||
DJANGODIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events
|
||||
EVENTDIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events/event_scrapers
|
||||
NEWSDIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events/event_scrapers/Working/news
|
||||
|
||||
cd $NEWSDIR
|
||||
for file in *
|
||||
do
|
||||
python "$file" $1
|
||||
echo "SCRIPT COMPLETE"
|
||||
done
|
||||
|
||||
cd $EVENTDIR
|
||||
python clean_up.py
|
||||
50
events/scrapers/run_scrapers.sh
Normal file
50
events/scrapers/run_scrapers.sh
Normal file
@@ -0,0 +1,50 @@
|
||||
#!/bin/bash
|
||||
|
||||
# BASEDIR=/var/www/digisnaxx.com/
|
||||
# DJANGODIR=/var/www/digisnaxx.com/ds_events
|
||||
# EVENTDIR=/var/www/digisnaxx.com/ds_events/event_scrapers
|
||||
|
||||
BASEDIR=/home/canin/Downloads/DigiSnaxxEvents
|
||||
DJANGODIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events
|
||||
EVENTDIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events/event_scrapers
|
||||
VENUESDIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events/event_scrapers/Working/venues
|
||||
ICALDIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events/event_scrapers/Working/iCal
|
||||
GOVTDIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events/event_scrapers/Working/govt
|
||||
|
||||
export DJANGO_SUPERUSER_EMAIL=canin@dreamfreely.org
|
||||
export DJANGO_SUPERUSER_USERNAME=canin
|
||||
export DJANGO_SUPERUSER_PASSWORD='hello123'
|
||||
|
||||
cd $BASEDIR
|
||||
pwd
|
||||
source venv/bin/activate
|
||||
|
||||
cd $DJANGODIR
|
||||
pwd
|
||||
mv db.sqlite3 db.sqlite3.bak
|
||||
# rm ../db.sqlite3
|
||||
touch db.sqlite3
|
||||
python manage.py migrate
|
||||
python manage.py createsuperuser --noinput
|
||||
python manage.py loaddata events/fixtures/organizations.json
|
||||
python manage.py loaddata events/fixtures/promo.json
|
||||
|
||||
cd $EVENTDIR
|
||||
python start_up.py
|
||||
|
||||
bash run_venues.sh $1
|
||||
bash run_ical.sh
|
||||
bash run_govt.sh $1
|
||||
bash run_news.sh $1
|
||||
|
||||
python Working/bluesky.py
|
||||
python Working/redsky.py
|
||||
|
||||
cd $EVENTDIR
|
||||
python clean_up.py
|
||||
|
||||
deactivate
|
||||
bash run_media_update.sh
|
||||
|
||||
rm -rf ../*/__pycache__
|
||||
rm -rf ../*/*/__pycache__
|
||||
16
events/scrapers/run_venues.sh
Normal file
16
events/scrapers/run_venues.sh
Normal file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
BASEDIR=/home/canin/Downloads/DigiSnaxxEvents
|
||||
DJANGODIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events
|
||||
EVENTDIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events/event_scrapers
|
||||
VENUESDIR=/home/canin/Downloads/DigiSnaxxEvents/ds_events/event_scrapers/Working/venues
|
||||
|
||||
cd $VENUESDIR
|
||||
for file in *
|
||||
do
|
||||
python "$file" $1
|
||||
echo "SCRIPT COMPLETE"
|
||||
done
|
||||
|
||||
cd $EVENTDIR
|
||||
python clean_up.py
|
||||
23
events/scrapers/start_up.py
Normal file
23
events/scrapers/start_up.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import re, os, sys
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Organization, Promo, Calendar
|
||||
|
||||
venue, created = Organization.objects.get_or_create(name="DreamFreely",
|
||||
website="https://www.dreamfreely.org",
|
||||
city="St Paul",
|
||||
contact_name="Canin Carlos",
|
||||
contact_email="canin@dreamfreely.org",
|
||||
phone_number="6124054535")
|
||||
|
||||
print("Created DreamFreely:", created, venue)
|
||||
|
||||
calendar, created = Calendar.objects.get_or_create(name='Mpls-StP', shortcode='msp', desc='none')
|
||||
calendar, created = Calendar.objects.get_or_create(name='Medellin', shortcode='mde', desc='none')
|
||||
calendar, created = Calendar.objects.get_or_create(name='Global', shortcode='000', desc='none')
|
||||
calendar, created = Calendar.objects.get_or_create(name='Online', shortcode='111', desc='none')
|
||||
30
events/scrapers/zArchive/FaceBook/BillPull.py
Normal file
30
events/scrapers/zArchive/FaceBook/BillPull.py
Normal file
@@ -0,0 +1,30 @@
|
||||
for sE in senateEvents[:5]:
|
||||
bills = sE.xpath('.//*/div[@class="mb-1"]/a/text()')
|
||||
bill_link = sE.xpath('.//*/div[@class="mb-1"]/a/@href')
|
||||
bill_items = zip(bills, bill_link)
|
||||
print(bills)
|
||||
for b,i in bill_items:
|
||||
if b.startswith("S.F."):
|
||||
print(b, i, "\n\n")
|
||||
|
||||
|
||||
|
||||
|
||||
import os
|
||||
from twilio.rest import Client
|
||||
|
||||
|
||||
# Find your Account SID and Auth Token at twilio.com/console
|
||||
# and set the environment variables. See http://twil.io/secure
|
||||
account_sid = os.environ['ACb416a0b2ed0a1be44c107b8bc1f683c5']
|
||||
auth_token = os.environ['33cae777f215a003deea6d4a0d5027c2']
|
||||
client = Client(account_sid, auth_token)
|
||||
|
||||
message = client.messages \
|
||||
.create(
|
||||
body="Join Earth's mightiest heroes. Like Kevin Bacon.",
|
||||
from_='+15017122661',
|
||||
to='+15558675310'
|
||||
)
|
||||
|
||||
print(message.sid)
|
||||
63
events/scrapers/zArchive/FaceBook/Mortimers.mpls.py
Normal file
63
events/scrapers/zArchive/FaceBook/Mortimers.mpls.py
Normal file
@@ -0,0 +1,63 @@
|
||||
import re, os, sys
|
||||
from datetime import datetime
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event, Organization
|
||||
|
||||
from pprint import pprint as ppr
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
from selenium import webdriver as wd
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
from xvfbwrapper import Xvfb
|
||||
from lxml import html
|
||||
import pytz
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%a, %b %d %Y'
|
||||
calendar_url = "https://www.facebook.com/mortimersmpls/events/"
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
# Initiate and start the Browser
|
||||
br = wd.Firefox()
|
||||
|
||||
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(10)
|
||||
br.find_element(By.XPATH, '//*/div[@class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x1ypdohk xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1o1ewxj x3x9cwd x1e5q0jg x13rtm0m x87ps6o x1lku1pv x1a2a7pz x9f619 x3nfvp2 xdt5ytf xl56j7k x1n2onr6 xh8yej3"]').click()
|
||||
print("Input Login Info")
|
||||
sleep(30)
|
||||
|
||||
ps = html.fromstring(br.page_source)
|
||||
|
||||
listings = ps.xpath('.//*/div[@class="x9f619 x1n2onr6 x1ja2u2z x78zum5 x2lah0s x1qughib x6s0dn4 xozqiw3 x1q0g3np x1pi30zi x1swvt13 xsag5q8 xz9dl7a x1n0m28w xp7jhwk x1wsgfga x9otpla"]')
|
||||
|
||||
for l in listings:
|
||||
gT = l.xpath('.//*/span/text()')
|
||||
dateTime = gT[0]
|
||||
show_title = gT[1]
|
||||
link = l.xpath('.//*/a/@href')[0].split("?")[0] + " " + current_year
|
||||
print(show_title, dateTime, link)
|
||||
venue, created = Organization.objects.get_or_create(name="Mortimer's")
|
||||
try:
|
||||
new_event = Event.objects.update_or_create(
|
||||
event_type = 'Mu',
|
||||
show_title = show_title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime.split(" AT")[0].strip(), DATETIME_FORMAT),
|
||||
venue = venue
|
||||
)
|
||||
except Exception as e:
|
||||
print(e, "\n\n++++\n\n")
|
||||
continue
|
||||
|
||||
|
||||
br.close()
|
||||
69
events/scrapers/zArchive/FaceBook/Mortimers.py
Normal file
69
events/scrapers/zArchive/FaceBook/Mortimers.py
Normal file
@@ -0,0 +1,69 @@
|
||||
import re, os, sys
|
||||
from datetime import datetime
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event, Organization
|
||||
|
||||
from pprint import pprint as ppr
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
from selenium import webdriver as wd
|
||||
from selenium.common.exceptions import TimeoutException
|
||||
from selenium.webdriver.support.ui import Select
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
from xvfbwrapper import Xvfb
|
||||
|
||||
import requests
|
||||
from lxml import html
|
||||
|
||||
import pytz
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%a, %b %d %Y'
|
||||
# Set initial variables for City, etc
|
||||
calendar_url = "https://www.facebook.com/mortimersmpls/events/"
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
# Initiate and start the Browser
|
||||
br = wd.Firefox()
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(10)
|
||||
br.find_element(By.XPATH, '//*/div[@class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x1ypdohk xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1o1ewxj x3x9cwd x1e5q0jg x13rtm0m x87ps6o x1lku1pv x1a2a7pz x9f619 x3nfvp2 xdt5ytf xl56j7k x1n2onr6 xh8yej3"]').click()
|
||||
print("Input Login Info")
|
||||
sleep(30)
|
||||
|
||||
ps = html.fromstring(br.page_source)
|
||||
|
||||
listings = ps.xpath('.//*/div[@class="x9f619 x1n2onr6 x1ja2u2z x78zum5 x2lah0s x1qughib x6s0dn4 xozqiw3 x1q0g3np x1pi30zi x1swvt13 xsag5q8 xz9dl7a x1n0m28w xp7jhwk x1wsgfga x9otpla"]')
|
||||
|
||||
for l in listings:
|
||||
gT = l.xpath('.//*/span/text()')
|
||||
dateTime = gT[0]
|
||||
show_title = gT[1]
|
||||
link = l.xpath('.//*/a/@href')[0].split("?")[0] + " " + current_year
|
||||
print(show_title, dateTime, link)
|
||||
venue, created = Organization.objects.get_or_create(name="Mortimer's")
|
||||
try:
|
||||
new_event = Event.objects.update_or_create(
|
||||
event_type = 'Mu',
|
||||
show_title = show_title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime.split(" AT")[0].strip(), DATETIME_FORMAT),
|
||||
venue = venue
|
||||
)
|
||||
except Exception as e:
|
||||
print(e, "\n\n++++\n\n")
|
||||
continue
|
||||
|
||||
|
||||
br.close()
|
||||
|
||||
|
||||
85
events/scrapers/zArchive/FaceBook/pillarforum.mpls.py
Normal file
85
events/scrapers/zArchive/FaceBook/pillarforum.mpls.py
Normal file
@@ -0,0 +1,85 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
from selenium import webdriver as wd
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
from xvfbwrapper import Xvfb
|
||||
from lxml import html
|
||||
import pytz
|
||||
|
||||
from events.models import Event as DSEvent, Organization
|
||||
from digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
|
||||
exit()
|
||||
|
||||
current_year = str(datetime.now().year)
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Piller Forum",
|
||||
city="Minneapolis",
|
||||
website="https://www.pilllar.com/pages/events",
|
||||
)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%B %d %I:%M%p %Y'
|
||||
DATETIME_FORMAT = '%B %A %d %I:%M-%I:%M%p'
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
br.get(venue.website)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
try:
|
||||
br.find_element(By.XPATH, '//*[@class="privy-dismiss-content"]').click()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
pass
|
||||
|
||||
months = br.find_elements(By.XPATH, '//*[@class="sse-display"]')
|
||||
|
||||
for month in months:
|
||||
month_name = month.find_element(By.XPATH, './/*[@class="sse-size-28"]/u').text.capitalize()
|
||||
events = month.find_elements(By.XPATH, './/p')
|
||||
for event in events:
|
||||
e = {}
|
||||
eventTitle = event.text
|
||||
try:
|
||||
e['title'] = " ".join(eventTitle.split("-")[1].split(" ")[1:-2])
|
||||
if 'Music' in eventTitle:
|
||||
e['event_type'] = "Mu"
|
||||
elif 'The Growth Arc' in eventTitle:
|
||||
e['event_type'] = "Ot"
|
||||
e['dateTime'] = " ".join([month_name, date, "7:00pm", current_year])
|
||||
e['dateStamp'] = datetime.strptime(e['dateTime'], DATETIME_FORMAT)
|
||||
e['title'] = "The Growth Arc - Relationship Support Space"
|
||||
e['link'] = venue.website
|
||||
elif 'Event' in eventTitle:
|
||||
e['event_type'] = "Mu"
|
||||
else:
|
||||
e['event_type'] = "Ot"
|
||||
date = eventTitle.split(":")[0].split(" ")[1].replace("th", "").replace("nd", "").replace("rd", "").replace("st", "")
|
||||
time = eventTitle.split("-")[1].split(" ")[-2:][0]
|
||||
e['dateTime'] = " ".join([month_name, date, time, current_year])
|
||||
e['dateStamp'] = datetime.strptime(e['dateTime'], DATETIME_FORMAT)
|
||||
e['link'] = venue.website
|
||||
createBasicEvent(e, venue)
|
||||
except Exception as e:
|
||||
print("error ", eventTitle)
|
||||
print(e)
|
||||
|
||||
sleep(3)
|
||||
# ppr(events)
|
||||
br.close()
|
||||
61
events/scrapers/zArchive/bluesky_scrape_old.py
Normal file
61
events/scrapers/zArchive/bluesky_scrape_old.py
Normal file
@@ -0,0 +1,61 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
from atproto import Client
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from socials.models import SocialLink
|
||||
# from digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
USERNAME = "dreamfreely.org"
|
||||
PASSWORD = "Futbol21!@"
|
||||
|
||||
client = Client()
|
||||
client.login(USERNAME, PASSWORD)
|
||||
feed = client.get_author_feed(USERNAME, limit = 100)
|
||||
|
||||
def createSocialLink(post):
|
||||
new_post, created = SocialLink.objects.update_or_create(
|
||||
cid = post['link_id'],
|
||||
uri = post['uri'],
|
||||
text = post['text'],
|
||||
link = post['link'],
|
||||
handle = post['handle'],
|
||||
likes = post['likes'],
|
||||
reposts = post['reposts'],
|
||||
quotes = post['quotes'],
|
||||
replies = post['replies'],
|
||||
created_at = post['created_at']
|
||||
)
|
||||
print(created, new_post)
|
||||
|
||||
for post in feed.feed:
|
||||
post = post.post
|
||||
if hasattr(post.record.embed, 'external'):
|
||||
p = {}
|
||||
p['link'] = post.record.embed.external.uri.split("?")[0]
|
||||
p['text'] = " ".join(post.record.text.split("\n")[:2])
|
||||
p['handle'] = post.author.handle
|
||||
p['link_id'] = post.uri.split("feed.post/")[-1]
|
||||
p['uri'] = post.uri
|
||||
p['likes'] = post.like_count
|
||||
p['quotes'] = post.quote_count
|
||||
p['replies'] = post.reply_count
|
||||
p['reposts'] = post.repost_count
|
||||
p['created_at'] = post.record.created_at
|
||||
|
||||
try:
|
||||
createSocialLink(p)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
82
events/scrapers/zArchive/broken/BirchbarkBooks.py
Normal file
82
events/scrapers/zArchive/broken/BirchbarkBooks.py
Normal file
@@ -0,0 +1,82 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
|
||||
from events.models import Event as DSEvent, Organization
|
||||
from digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Birchbark Books",
|
||||
city="Minneapolis",
|
||||
website="https://birchbarkbooks.com/pages/events",
|
||||
)
|
||||
|
||||
DATETIME_FORMAT = '%A, %B %d @ %I:%M%p %Y'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
DATETIME_FORMAT_3 = '%A, %B %d at %I:%M%p %Y'
|
||||
DATETIME_FORMAT_4 = '%A, %B %d at %I%p %Y'
|
||||
DATETIME_FORMAT_5 = '%A, %B %d @%I%p %Y'
|
||||
|
||||
def get_events(ps):
|
||||
contents = ps.xpath('.//*/div[@class="feature-row"]')
|
||||
# ppr("contents:", contents)
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
event['title'] = c.xpath('.//*/p[@class="h3"]/text()')[0].strip()
|
||||
event['link'] = "https://birchbarkbooks.com/pages/events"
|
||||
event['date'] = c.xpath('.//*/p[@class="accent-subtitle"]/text()')[0].replace("Central", "") + " " + current_year
|
||||
event['date_num'] = event['date'].split(" ")[2].replace("th", "").replace("st", "").replace("rd", "").replace("nd", "")
|
||||
event['date'] = event['date'].split(" ")
|
||||
event['date'][2] = event['date_num']
|
||||
event['date'] = " ".join(event['date'])
|
||||
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
createBasicEvent(event, "Ed", venue)
|
||||
except Exception as e:
|
||||
try:
|
||||
print(e)
|
||||
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT_2)
|
||||
createBasicEvent(event, "Ed", venue)
|
||||
print("\n\n+++\n\n")
|
||||
except Exception as e:
|
||||
try:
|
||||
print(e)
|
||||
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT_3)
|
||||
createBasicEvent(event, "Ed", venue)
|
||||
print("\n\n+++\n\n")
|
||||
except Exception as e:
|
||||
try:
|
||||
print(e)
|
||||
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT_4)
|
||||
createBasicEvent(event, "Ed", venue)
|
||||
print("\n\n+++\n\n")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT_5)
|
||||
createBasicEvent(event, "Ed", venue)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
calendar_url = 'https://birchbarkbooks.com/pages/events'
|
||||
|
||||
ps = getSource(br, calendar_url)
|
||||
get_events(ps)
|
||||
|
||||
# ppr(events)
|
||||
br.close()
|
||||
30
events/scrapers/zArchive/broken/FaceBook/BillPull.py
Normal file
30
events/scrapers/zArchive/broken/FaceBook/BillPull.py
Normal file
@@ -0,0 +1,30 @@
|
||||
for sE in senateEvents[:5]:
|
||||
bills = sE.xpath('.//*/div[@class="mb-1"]/a/text()')
|
||||
bill_link = sE.xpath('.//*/div[@class="mb-1"]/a/@href')
|
||||
bill_items = zip(bills, bill_link)
|
||||
print(bills)
|
||||
for b,i in bill_items:
|
||||
if b.startswith("S.F."):
|
||||
print(b, i, "\n\n")
|
||||
|
||||
|
||||
|
||||
|
||||
import os
|
||||
from twilio.rest import Client
|
||||
|
||||
|
||||
# Find your Account SID and Auth Token at twilio.com/console
|
||||
# and set the environment variables. See http://twil.io/secure
|
||||
account_sid = os.environ['ACb416a0b2ed0a1be44c107b8bc1f683c5']
|
||||
auth_token = os.environ['33cae777f215a003deea6d4a0d5027c2']
|
||||
client = Client(account_sid, auth_token)
|
||||
|
||||
message = client.messages \
|
||||
.create(
|
||||
body="Join Earth's mightiest heroes. Like Kevin Bacon.",
|
||||
from_='+15017122661',
|
||||
to='+15558675310'
|
||||
)
|
||||
|
||||
print(message.sid)
|
||||
63
events/scrapers/zArchive/broken/FaceBook/Mortimers.mpls.py
Normal file
63
events/scrapers/zArchive/broken/FaceBook/Mortimers.mpls.py
Normal file
@@ -0,0 +1,63 @@
|
||||
import re, os, sys
|
||||
from datetime import datetime
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event, Organization
|
||||
|
||||
from pprint import pprint as ppr
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
from selenium import webdriver as wd
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
from xvfbwrapper import Xvfb
|
||||
from lxml import html
|
||||
import pytz
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%a, %b %d %Y'
|
||||
calendar_url = "https://www.facebook.com/mortimersmpls/events/"
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
# Initiate and start the Browser
|
||||
br = wd.Firefox()
|
||||
|
||||
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(10)
|
||||
br.find_element(By.XPATH, '//*/div[@class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x1ypdohk xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1o1ewxj x3x9cwd x1e5q0jg x13rtm0m x87ps6o x1lku1pv x1a2a7pz x9f619 x3nfvp2 xdt5ytf xl56j7k x1n2onr6 xh8yej3"]').click()
|
||||
print("Input Login Info")
|
||||
sleep(30)
|
||||
|
||||
ps = html.fromstring(br.page_source)
|
||||
|
||||
listings = ps.xpath('.//*/div[@class="x9f619 x1n2onr6 x1ja2u2z x78zum5 x2lah0s x1qughib x6s0dn4 xozqiw3 x1q0g3np x1pi30zi x1swvt13 xsag5q8 xz9dl7a x1n0m28w xp7jhwk x1wsgfga x9otpla"]')
|
||||
|
||||
for l in listings:
|
||||
gT = l.xpath('.//*/span/text()')
|
||||
dateTime = gT[0]
|
||||
show_title = gT[1]
|
||||
link = l.xpath('.//*/a/@href')[0].split("?")[0] + " " + current_year
|
||||
print(show_title, dateTime, link)
|
||||
venue, created = Organization.objects.get_or_create(name="Mortimer's")
|
||||
try:
|
||||
new_event = Event.objects.update_or_create(
|
||||
event_type = 'Mu',
|
||||
show_title = show_title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime.split(" AT")[0].strip(), DATETIME_FORMAT),
|
||||
venue = venue
|
||||
)
|
||||
except Exception as e:
|
||||
print(e, "\n\n++++\n\n")
|
||||
continue
|
||||
|
||||
|
||||
br.close()
|
||||
69
events/scrapers/zArchive/broken/FaceBook/Mortimers.py
Normal file
69
events/scrapers/zArchive/broken/FaceBook/Mortimers.py
Normal file
@@ -0,0 +1,69 @@
|
||||
import re, os, sys
|
||||
from datetime import datetime
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event, Organization
|
||||
|
||||
from pprint import pprint as ppr
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
from selenium import webdriver as wd
|
||||
from selenium.common.exceptions import TimeoutException
|
||||
from selenium.webdriver.support.ui import Select
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
from xvfbwrapper import Xvfb
|
||||
|
||||
import requests
|
||||
from lxml import html
|
||||
|
||||
import pytz
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%a, %b %d %Y'
|
||||
# Set initial variables for City, etc
|
||||
calendar_url = "https://www.facebook.com/mortimersmpls/events/"
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
# Initiate and start the Browser
|
||||
br = wd.Firefox()
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(10)
|
||||
br.find_element(By.XPATH, '//*/div[@class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x1ypdohk xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1o1ewxj x3x9cwd x1e5q0jg x13rtm0m x87ps6o x1lku1pv x1a2a7pz x9f619 x3nfvp2 xdt5ytf xl56j7k x1n2onr6 xh8yej3"]').click()
|
||||
print("Input Login Info")
|
||||
sleep(30)
|
||||
|
||||
ps = html.fromstring(br.page_source)
|
||||
|
||||
listings = ps.xpath('.//*/div[@class="x9f619 x1n2onr6 x1ja2u2z x78zum5 x2lah0s x1qughib x6s0dn4 xozqiw3 x1q0g3np x1pi30zi x1swvt13 xsag5q8 xz9dl7a x1n0m28w xp7jhwk x1wsgfga x9otpla"]')
|
||||
|
||||
for l in listings:
|
||||
gT = l.xpath('.//*/span/text()')
|
||||
dateTime = gT[0]
|
||||
show_title = gT[1]
|
||||
link = l.xpath('.//*/a/@href')[0].split("?")[0] + " " + current_year
|
||||
print(show_title, dateTime, link)
|
||||
venue, created = Organization.objects.get_or_create(name="Mortimer's")
|
||||
try:
|
||||
new_event = Event.objects.update_or_create(
|
||||
event_type = 'Mu',
|
||||
show_title = show_title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime.split(" AT")[0].strip(), DATETIME_FORMAT),
|
||||
venue = venue
|
||||
)
|
||||
except Exception as e:
|
||||
print(e, "\n\n++++\n\n")
|
||||
continue
|
||||
|
||||
|
||||
br.close()
|
||||
|
||||
|
||||
Binary file not shown.
72
events/scrapers/zArchive/broken/acadia.py
Normal file
72
events/scrapers/zArchive/broken/acadia.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
|
||||
from events.models import Event as DSEvent, Organization
|
||||
from digitools import getBrowser, createDashURL, createBasicEvent, getSource
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Acadia Cafe",
|
||||
city="Minneapolis",
|
||||
website="https://acadiacafe.com",
|
||||
)
|
||||
|
||||
calendar_url = "https://www.acadiacafe.com/events"
|
||||
|
||||
DATETIME_FORMAT = '%d %m %Y %I:%M%p'
|
||||
|
||||
def get_events(ps, link):
|
||||
contents = ps.xpath('.//*/div[@class="cl-view-month__day__event__title"]')
|
||||
print(contents)
|
||||
quit()
|
||||
|
||||
for c in contents:
|
||||
try:
|
||||
day_num = c.xpath('.//*/div[@class="marker-daynum"]/text()')[0]
|
||||
events = c.xpath('.//*/li')
|
||||
# print(events)
|
||||
for e in events:
|
||||
event = {}
|
||||
event['month'] = link.split("month=")[1].split("-")[0]
|
||||
event['year'] = link.split("month=")[1].split("-")[1]
|
||||
event['title'] = e.xpath('.//h1/a[@class="flyoutitem-link"]/text()')
|
||||
event['time'] = e.xpath('.//div[@class="flyoutitem-datetime flyoutitem-datetime--12hr"]/text()')
|
||||
event['link'] = e.xpath('.//a/@href')[0]
|
||||
event['date'] = str(day_num) + ' ' + 'time'
|
||||
# event['dateStamp'] = datetime.strptime(dateStamp, DATETIME_FORMAT)
|
||||
if len(event['title']):
|
||||
nevent = {}
|
||||
nevent['title'] = "".join(event['title']).strip()
|
||||
event['time'] = event['time'][0].strip().split(" –")[0]
|
||||
nevent['link'] = "https://palmers-bar.com" + e.xpath('.//a/@href')[0]
|
||||
event['dateStamp'] = str(day_num) + ' ' + event['month'] + ' ' + event['year'] + ' ' + event['time']
|
||||
nevent['dateStamp'] = datetime.strptime(event['dateStamp'], DATETIME_FORMAT)
|
||||
createBasicEvent(nevent, 'Mu', venue)
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
|
||||
|
||||
ps = getSource(br, calendar_url)
|
||||
sleep(5)
|
||||
get_events(ps, calendar_url)
|
||||
sleep(5)
|
||||
|
||||
# ppr(events)
|
||||
br.close()
|
||||
64
events/scrapers/zArchive/broken/cedar.mpls.py
Normal file
64
events/scrapers/zArchive/broken/cedar.mpls.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
from selenium import webdriver as wd
|
||||
|
||||
from xvfbwrapper import Xvfb
|
||||
from lxml import html
|
||||
import pytz
|
||||
|
||||
from events.models import Event as DSEvent, Organization
|
||||
from digitools import getBrowser, createBasicEvent, getSource
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Cedar Cultural Center",
|
||||
city="Minneapolis",
|
||||
website="https://www.thecedar.org/listing",
|
||||
)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
DATETIME_FORMAT_3 = '%A, %B %d at %I:%M%p %Y'
|
||||
DATETIME_FORMAT_4 = '%A, %B %d at %I%p %Y'
|
||||
DATETIME_FORMAT_5 = '%A, %B %d @%I%p %Y'
|
||||
|
||||
def get_events(ps):
|
||||
links = ps.xpath('.//*/div[@class="summary-title"]/a/@href')
|
||||
# ppr("contents:", contents)
|
||||
for l in links:
|
||||
br.get("https://www.thecedar.org" + l)
|
||||
sleep(3)
|
||||
pse = html.fromstring(br.page_source)
|
||||
event = {}
|
||||
time = pse.xpath('.//*/time[@class="event-time-12hr-start"]/text()')[0]
|
||||
date = pse.xpath('.//*/time[@class="event-date"]/text()')[0]
|
||||
event['title'] = pse.xpath('.//*/h1[@class="eventitem-title"]/text()')[0]
|
||||
dateStamp = date + " " + time
|
||||
event['dateStamp'] = datetime.strptime(dateStamp, DATETIME_FORMAT)
|
||||
event['link'] = "https://www.thecedar.org" + l
|
||||
createBasicEvent(event, "Mu", venue)
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
|
||||
calendar_url = 'https://www.thecedar.org/listing'
|
||||
ps = getSource(br, calendar_url)
|
||||
get_events(ps)
|
||||
# ppr(events)
|
||||
br.close()
|
||||
117
events/scrapers/zArchive/broken/digitools.py
Normal file
117
events/scrapers/zArchive/broken/digitools.py
Normal file
@@ -0,0 +1,117 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
from time import sleep
|
||||
import pytz
|
||||
from lxml import html
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from xvfbwrapper import Xvfb
|
||||
from selenium import webdriver as wd
|
||||
|
||||
from events.models import Event as DSEvent, Organization
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
td = relativedelta.relativedelta(months=1)
|
||||
odt = datetime.now() + td
|
||||
|
||||
|
||||
def getSource(browser, link):
|
||||
browser.get(link)
|
||||
sleep(5)
|
||||
ps = html.fromstring(browser.page_source)
|
||||
return ps
|
||||
|
||||
def getBrowser(run_env):
|
||||
if run_env == 'dev':
|
||||
print("Chrome is a go!")
|
||||
# chromeOptions = wd.ChromeOptions()
|
||||
# chromeOptions.binary_location = "/Application/Google\ Chrome.app"
|
||||
# chromeDriver = "/opt/homebrew/bin/chromedriver"
|
||||
# br = wd.Chrome(chromeDriver, options=chromeOptions)
|
||||
br = wd.Chrome()
|
||||
return br
|
||||
elif run_env == "def":
|
||||
print("Firefox go vroom")
|
||||
br = wd.Firefox()
|
||||
return br
|
||||
elif run_env == "prod":
|
||||
start_cmd = "Xvfb :91 && export DISPLAY=:91 &"
|
||||
xvfb = Xvfb()
|
||||
os.system(start_cmd)
|
||||
xvfb.start()
|
||||
print("started Xvfb")
|
||||
br = wd.Firefox()
|
||||
return br
|
||||
else:
|
||||
print("Failed", sys.argv, arg1)
|
||||
quit()
|
||||
|
||||
def createBasicURL(site_url):
|
||||
month = datetime.now().month
|
||||
next_month = odt.month
|
||||
year = datetime.now().year
|
||||
print(month, next_month, year)
|
||||
links = [
|
||||
site_url + str(month) + "/" + str(year),
|
||||
site_url + str(next_month) + "/" + str(year)
|
||||
]
|
||||
print(links)
|
||||
return links
|
||||
|
||||
def createURL(site_url):
|
||||
month = datetime.now().month
|
||||
if month < 10:
|
||||
month = "0" + str(month)
|
||||
else:
|
||||
month = str(month)
|
||||
next_month = odt.month
|
||||
if next_month < 10:
|
||||
next_month = "0" + str(next_month)
|
||||
else:
|
||||
next_month = str(next_month)
|
||||
year = datetime.now().year
|
||||
links = [
|
||||
site_url + str(year) + "/" + month,
|
||||
]
|
||||
if next_month == "01":
|
||||
links.append(site_url + str(int(year)+1) + "/" + next_month)
|
||||
else:
|
||||
links.append(site_url + str(year) + "/" + next_month)
|
||||
print(links)
|
||||
return links
|
||||
|
||||
def createDashURL(site_url):
|
||||
month = datetime.now().month
|
||||
if month < 10:
|
||||
month = "0" + str(month)
|
||||
else:
|
||||
month = str(month)
|
||||
next_month = odt.month
|
||||
if next_month < 10:
|
||||
next_month = "0" + str(next_month)
|
||||
else:
|
||||
next_month = str(next_month)
|
||||
year = datetime.now().year
|
||||
print(month, next_month, year)
|
||||
links = [
|
||||
site_url + month + "-" + str(year),
|
||||
site_url + next_month + "-" + str(year)
|
||||
]
|
||||
print(links)
|
||||
return links
|
||||
|
||||
def createBasicEvent(event, event_type, venue):
|
||||
new_event, created = DSEvent.objects.update_or_create(
|
||||
event_type = event_type,
|
||||
show_title = event['title'],
|
||||
show_link = event['link'],
|
||||
show_date = event['dateStamp'],
|
||||
show_day = event['dateStamp'],
|
||||
venue = venue
|
||||
)
|
||||
print("New Event: ", new_event)
|
||||
139
events/scrapers/zArchive/broken/ical.TriviaMafia.py.bak
Normal file
139
events/scrapers/zArchive/broken/ical.TriviaMafia.py.bak
Normal file
@@ -0,0 +1,139 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization
|
||||
from dateutil import relativedelta
|
||||
|
||||
|
||||
def createEvent(event, Organization, event_type):
|
||||
new_event, created = DSEvent.objects.update_or_create(
|
||||
event_type = event_type,
|
||||
show_title = event['strSummary'],
|
||||
show_link = venue.website,
|
||||
show_date = event['dateStart']-td,
|
||||
show_day = event['dateStart']-td,
|
||||
more_details = event["details"],
|
||||
venue = venue
|
||||
)
|
||||
return new_event, created
|
||||
|
||||
|
||||
def createVenue(event):
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name = event['venue'],
|
||||
address = event['address'],
|
||||
city = event['city'],
|
||||
)
|
||||
return venue, created
|
||||
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
|
||||
event_type = "Ot"
|
||||
|
||||
calendar_url = 'https://calendar.google.com/calendar/ical/c_g1i6cbb3glhu6or0hu8kemah7k%40group.calendar.google.com/public/basic.ics'
|
||||
|
||||
objIcalData = requests.get(calendar_url)
|
||||
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
|
||||
cfpa_events = []
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
for component in gcal.walk():
|
||||
event = {}
|
||||
event['strSummary'] = f"{(component.get('SUMMARY'))}"
|
||||
event['strDesc'] = component.get('DESCRIPTION')
|
||||
event['strLocation'] = component.get('LOCATION')
|
||||
event['dateStart'] = component.get('DTSTART')
|
||||
event['dateStamp'] = component.get('DTSTAMP')
|
||||
event['RepeatRule'] = component.get('RRULE')
|
||||
if event['dateStamp'] is not None:
|
||||
event['dateStamp'] = event['dateStamp'].dt
|
||||
if event['dateStart'] is not None:
|
||||
try:
|
||||
event['dateStart'] = event['dateStart'].dt
|
||||
except Exception as e:
|
||||
event['dateStart'] = event['dateStart'].dt
|
||||
event['dateEnd'] = (component.get('DTEND'))
|
||||
if event['dateEnd'] is not None:
|
||||
event['dateEnd'] = event['dateEnd'].dt
|
||||
else:
|
||||
event['dateEnd'] = event['dateStart']
|
||||
if event['strSummary'] != 'None':
|
||||
event['details'] = {
|
||||
"description" : event['strDesc'],
|
||||
"Location" : event['strLocation'],
|
||||
}
|
||||
try:
|
||||
event['venue'] = event['strLocation'].split(",")[0].strip()
|
||||
event['address'] = event['strLocation'].split(",")[1].strip()
|
||||
event['city'] = event['strLocation'].split(",")[2].strip()
|
||||
try:
|
||||
event['state'] = event['strLocation'].split(",")[3].split(' ')[1]
|
||||
event['zip'] = event['strLocation'].split(",")[3].split(' ')[2]
|
||||
except Exception as error:
|
||||
pass
|
||||
except Exception as error:
|
||||
pass
|
||||
cfpa_events.append(event)
|
||||
# print(event)
|
||||
now_now = datetime.now().astimezone(pytz.utc)
|
||||
try:
|
||||
if event['dateStart'] > now_now:
|
||||
if not event['address'][0].isdigit():
|
||||
continue
|
||||
venue, created = createVenue(event)
|
||||
new_event, created = createEvent(event, venue, event_type)
|
||||
print(new_event)
|
||||
except Exception as e:
|
||||
try:
|
||||
if event['dateStart'] > now_now.date():
|
||||
if not event['address'][0].isdigit():
|
||||
continue
|
||||
venue, created = createVenue(event)
|
||||
new_event, created = createEvent(event, venue, event_type)
|
||||
print(new_event)
|
||||
except Exception as e:
|
||||
ppr(event)
|
||||
print(e)
|
||||
|
||||
|
||||
# new_events = []
|
||||
# for event in cfpa_events:
|
||||
# now_now = datetime.now().astimezone(pytz.utc)
|
||||
# try:
|
||||
# if event['dateStart'] > now_now:
|
||||
# new_events.append(event)
|
||||
# except Exception as e:
|
||||
# try:
|
||||
# if event['dateStart'] > now_now.date():
|
||||
# new_events.append(event)
|
||||
# except Exception as e:
|
||||
# print(e)
|
||||
# ppr(event)
|
||||
|
||||
|
||||
# {'dateEnd': datetime.datetime(2022, 10, 22, 18, 30, tzinfo=<UTC>),
|
||||
# 'dateStamp': datetime.datetime(2023, 3, 23, 1, 57, 45, tzinfo=<UTC>),
|
||||
# 'dateStart': datetime.datetime(2022, 10, 22, 17, 30, tzinfo=<UTC>),
|
||||
# 'details': {'DateTime': datetime.datetime(2022, 10, 22, 17, 30, tzinfo=<UTC>),
|
||||
# 'Location': vText('b'''),
|
||||
# 'description': None},
|
||||
# 'strDesc': None,
|
||||
# 'strLocation': vText('b'''),
|
||||
# 'strSummary': 'Nia Class with Beth Giles'}
|
||||
|
||||
|
||||
77
events/scrapers/zArchive/broken/palmers.mpls.py
Normal file
77
events/scrapers/zArchive/broken/palmers.mpls.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
|
||||
from events.models import Event as DSEvent, Organization
|
||||
from digitools import getBrowser, createDashURL, createBasicEvent, getSource
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Palmer's Bar",
|
||||
city="Minneapolis",
|
||||
website="https://palmers-bar.com",
|
||||
)
|
||||
|
||||
calendar_url = "https://palmers-bar.com"
|
||||
|
||||
DATETIME_FORMAT = '%d %m %Y %I:%M%p'
|
||||
|
||||
def get_events(ps, link):
|
||||
contents = ps.xpath('.//*/td')
|
||||
for c in contents:
|
||||
try:
|
||||
# day_num = c.xpath('.//*/div[@class="marker-daynum"]/text()')[0]
|
||||
events = c.xpath('.//*/li')
|
||||
# print(events)
|
||||
for e in events:
|
||||
event = {}
|
||||
event_link = calendar_url + e.xpath('.//a/@href')[0]
|
||||
ps = getSource(br, event_link)
|
||||
new_event = ps.xpath('.//*/h1[@class="eventitem-column-meta"]')
|
||||
event['title'] = new_event.xpath('.//*/h1[@class="event-title"]')
|
||||
event['date'] = new_event.xpath('.//*/time[@class="event-date"]')
|
||||
event['time'] = new_event.xpath('.//*/time[@class="event-time-12hr-start"]')
|
||||
event['link'] = event_link
|
||||
|
||||
# event['month'] = link.split("month=")[1].split("-")[0]
|
||||
# event['year'] = link.split("month=")[1].split("-")[1]
|
||||
# event['title'] = e.xpath('.//h1/a[@class="flyoutitem-link"]/text()')
|
||||
# event['time'] = e.xpath('.//div[@class="flyoutitem-datetime flyoutitem-datetime--12hr"]/text()')
|
||||
# event['date'] = str(day_num) + ' ' + 'time'
|
||||
# event['dateStamp'] = datetime.strptime(dateStamp, DATETIME_FORMAT)
|
||||
ppr(event)
|
||||
if len(event['title']):
|
||||
nevent = {}
|
||||
nevent['title'] = "".join(event['title']).strip()
|
||||
event['time'] = event['time'][0].strip().split(" –")[0]
|
||||
nevent['link'] = "https://palmers-bar.com" + e.xpath('.//a/@href')[0]
|
||||
event['dateStamp'] = str(day_num) + ' ' + event['month'] + ' ' + event['year'] + ' ' + event['time']
|
||||
nevent['dateStamp'] = datetime.strptime(event['dateStamp'], DATETIME_FORMAT)
|
||||
ppr(nevent)
|
||||
# createBasicEvent(nevent, 'Mu', venue)
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
links = createDashURL("https://palmers-bar.com/?view=calendar&month=")
|
||||
|
||||
for link in links:
|
||||
ps = getSource(br, link)
|
||||
get_events(ps, link)
|
||||
sleep(5)
|
||||
|
||||
# ppr(events)
|
||||
br.close()
|
||||
132
events/scrapers/zArchive/digitools_old.py
Normal file
132
events/scrapers/zArchive/digitools_old.py
Normal file
@@ -0,0 +1,132 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
from time import sleep
|
||||
import pytz
|
||||
from lxml import html
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from xvfbwrapper import Xvfb
|
||||
from selenium import webdriver as wd
|
||||
|
||||
from events.models import Event as DSEvent, Organization
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
td = relativedelta.relativedelta(months=1)
|
||||
odt = datetime.now() + td
|
||||
|
||||
|
||||
def getSource(browser, link):
|
||||
browser.get(link)
|
||||
sleep(3)
|
||||
ps = html.fromstring(browser.page_source)
|
||||
return ps
|
||||
|
||||
def getBrowser(run_env):
|
||||
if run_env == 'dev':
|
||||
print("Chrome is a go!")
|
||||
# chromeOptions = wd.ChromeOptions()
|
||||
# chromeOptions.binary_location = "/Application/Google\ Chrome.app"
|
||||
# chromeDriver = "/opt/homebrew/bin/chromedriver"
|
||||
# br = wd.Chrome(chromeDriver, options=chromeOptions)
|
||||
br = wd.Chrome()
|
||||
return br
|
||||
elif run_env == "def":
|
||||
print("Firefox go vroom")
|
||||
br = wd.Firefox()
|
||||
return br
|
||||
elif run_env == "prod":
|
||||
start_cmd = "Xvfb :91 && export DISPLAY=:91 &"
|
||||
xvfb = Xvfb()
|
||||
os.system(start_cmd)
|
||||
xvfb.start()
|
||||
print("started Xvfb")
|
||||
br = wd.Firefox()
|
||||
return br
|
||||
else:
|
||||
print("Failed", sys.argv, arg1)
|
||||
quit()
|
||||
|
||||
def createBasicURL(site_url):
|
||||
month = datetime.now().month
|
||||
next_month = odt.month
|
||||
year = datetime.now().year
|
||||
print(month, next_month, year)
|
||||
links = [
|
||||
site_url + str(month) + "/" + str(year),
|
||||
site_url + str(next_month) + "/" + str(year)
|
||||
]
|
||||
print(links)
|
||||
return links
|
||||
|
||||
def createURLNoZero(site_url):
|
||||
month = datetime.now().month
|
||||
next_month = odt.month
|
||||
year = datetime.now().year
|
||||
links = [
|
||||
site_url + str(year) + "/" + month,
|
||||
]
|
||||
if next_month == "1":
|
||||
links.append(site_url + str(int(year)+1) + "/" + next_month)
|
||||
else:
|
||||
links.append(site_url + str(year) + "/" + next_month)
|
||||
print(links)
|
||||
return links
|
||||
|
||||
def createURL(site_url):
|
||||
month = datetime.now().month
|
||||
if month < 10:
|
||||
month = "0" + str(month)
|
||||
else:
|
||||
month = str(month)
|
||||
next_month = odt.month
|
||||
if next_month < 10:
|
||||
next_month = "0" + str(next_month)
|
||||
else:
|
||||
next_month = str(next_month)
|
||||
year = datetime.now().year
|
||||
links = [
|
||||
site_url + str(year) + "/" + month,
|
||||
]
|
||||
if next_month == "01":
|
||||
links.append(site_url + str(int(year)+1) + "/" + next_month)
|
||||
else:
|
||||
links.append(site_url + str(year) + "/" + next_month)
|
||||
print(links)
|
||||
return links
|
||||
|
||||
def createDashURL(site_url):
|
||||
month = datetime.now().month
|
||||
if month < 10:
|
||||
month = "0" + str(month)
|
||||
else:
|
||||
month = str(month)
|
||||
next_month = odt.month
|
||||
if next_month < 10:
|
||||
next_month = "0" + str(next_month)
|
||||
else:
|
||||
next_month = str(next_month)
|
||||
year = datetime.now().year
|
||||
print(month, next_month, year)
|
||||
links = [
|
||||
site_url + month + "-" + str(year),
|
||||
site_url + next_month + "-" + str(year)
|
||||
]
|
||||
print(links)
|
||||
return links
|
||||
|
||||
def createBasicEvent(event, event_type, venue):
|
||||
new_event, created = DSEvent.objects.update_or_create(
|
||||
calendar = event['calendar'],
|
||||
event_type = event_type,
|
||||
show_title = event['title'],
|
||||
show_link = event['link'],
|
||||
show_date = event['dateStamp'],
|
||||
show_day = event['dateStamp'],
|
||||
venue = venue
|
||||
)
|
||||
print("New Event: ", new_event)
|
||||
Reference in New Issue
Block a user