more scrapers

This commit is contained in:
2026-01-24 19:01:47 -05:00
parent 7013d8327a
commit 3c4a41ae2c
58 changed files with 1614 additions and 2988 deletions

66
Working/govt/co.denver.py Normal file
View File

@@ -0,0 +1,66 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Denver City Council",
city="Denver",
website="https://denver.legistar.com/Calendar.aspx",
is_venue=True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'dia')
event_type = "Gv"
# Time Signatures
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%m/%d/%Y %I:%M %p %z %Z'
DATETIME_FORMAT_2 = '%B %d, %Y'
def get_events(ps, event_type):
print("Getting events ...")
contents = ps.xpath('.//*/tr[@class="rgRow"]')
for c in contents:
try:
event = {}
event['scraper'] = scraper
event['link'] = venue.website
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/a/text()')[0]
event['time'] = c.xpath('.//*/span/text()')[2]
event['date'] = c.xpath('.//td[@class="rgSorted"]/text()')[0]
event['dateStamp'] = datetime.strptime(event['date']+" "+event['time'] + tz_str, DATETIME_FORMAT)
ppr(event)
digitools.createBasicEvent(event, event_type, venue)
scraper.items+=1
except Exception as e:
print(e)
ppr(event)
print("\n\n+++\n\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
ps = digitools.getSource(br, venue.website)
sleep(1)
get_events(ps, "Gv")
sleep(2)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -20,13 +20,13 @@ venue, created = Organization.objects.get_or_create(
is_venue=True is_venue=True
) )
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'bos') scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'bos')
event_type = "Gv" event_type = "Gv"
# Time Signatures # Time Signatures
tz = pytz.timezone("US/Eastern") tz_str = " -0500 UTC"
DATETIME_FORMAT = '%B %d, %Y %I:%M%p' DATETIME_FORMAT = '%B %d, %Y %I:%M%p %z %Z'
def get_events(ps, event_type): def get_events(ps, event_type):
print("Getting events ...") print("Getting events ...")
@@ -40,7 +40,7 @@ def get_events(ps, event_type):
event['title'] = c.xpath('.//*/div[@class="n-li-t"]/a/text()')[0] event['title'] = c.xpath('.//*/div[@class="n-li-t"]/a/text()')[0]
event['link'] = venue.website + c.xpath('.//*/div[@class="n-li-t"]/a/@href')[0] event['link'] = venue.website + c.xpath('.//*/div[@class="n-li-t"]/a/@href')[0]
event['date'] = c.xpath('.//*/li[@class="dl-i"]/span[@class="dl-d"]/text()')[0].replace('\n', '').split("-")[0].strip() event['date'] = c.xpath('.//*/li[@class="dl-i"]/span[@class="dl-d"]/text()')[0].replace('\n', '').split("-")[0].strip()
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT) event['dateStamp'] = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT)
digitools.createBasicEvent(event, event_type, venue) digitools.createBasicEvent(event, event_type, venue)
ppr(event) ppr(event)
scraper.items+=1 scraper.items+=1

View File

@@ -35,8 +35,8 @@ except Exception as e:
print("Scraper: ", scraper) print("Scraper: ", scraper)
tz = pytz.timezone("US/Central") tz = pytz.timezone("US/Central")
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p' DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p %z %Z'
# Set initial variables for City, etc # Set initial variables for City, etc
calendar_url = 'https://www.leg.mn.gov/cal?type=all' calendar_url = 'https://www.leg.mn.gov/cal?type=all'
@@ -76,8 +76,8 @@ for hE in houseEvents:
show_title = title, show_title = title,
show_link = link, show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT), show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(), show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT).date(),
more_details = details['location'], # more_details = details['location'],
venue = venue, venue = venue,
scraper = scraper scraper = scraper
) )
@@ -113,8 +113,8 @@ for sE in senateEvents:
show_title = title, show_title = title,
show_link = link, show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT), show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(), show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT).date(),
more_details = details['location'], # more_details = details['location'],
venue = venue, venue = venue,
scraper = scraper scraper = scraper
) )
@@ -147,8 +147,8 @@ for cE in commEvents:
show_title = title, show_title = title,
show_link = link, show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT), show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(), show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT).date(),
more_details = details['location'], # more_details = details['location'],
venue = venue, venue = venue,
scraper = scraper scraper = scraper
) )

View File

@@ -49,6 +49,8 @@ gcal = iCalendar.from_ical(objIcalData.text)
cfpa_events = [] cfpa_events = []
tz = pytz.timezone("US/Central") tz = pytz.timezone("US/Central")
tz_str = " -0600 UTC"
for component in gcal.walk(): for component in gcal.walk():
event = {} event = {}
@@ -84,8 +86,8 @@ for component in gcal.walk():
event_type = event_type, event_type = event_type,
show_title = event['strSummary'], show_title = event['strSummary'],
show_link = venue.website, show_link = venue.website,
show_date = event['dateStart']-td, show_date = event['dateStart'],
show_day = event['dateStart']-td, show_day = event['dateStart'],
more_details = event["details"], more_details = event["details"],
venue = venue venue = venue
) )

View File

@@ -36,11 +36,15 @@ except Exception as e:
scraper = Scraper.objects.get(name="Mpls City Council") scraper = Scraper.objects.get(name="Mpls City Council")
print("Scraper: ", scraper) print("Scraper: ", scraper)
DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S' DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S %z %Z'
tz = pytz.timezone("US/Central") tz = pytz.timezone("US/Central")
td = timedelta(days=2) td = timedelta(days=2)
odt = datetime.now() - td odt = datetime.now() - td
tz_str = " -0600 UTC"
month = odt.strftime("%b") month = odt.strftime("%b")
day = int(datetime.now().day) day = int(datetime.now().day)
@@ -74,7 +78,7 @@ for event in new_events:
e = {} e = {}
e['title'] = event['CommitteeName'] e['title'] = event['CommitteeName']
e['link'] = scraper.website e['link'] = scraper.website
e['dateStamp'] = datetime.strptime(event['MeetingTime'], DATETIME_FORMAT) e['dateStamp'] = datetime.strptime(event['MeetingTime'] + tz_str, DATETIME_FORMAT)
e['agendaStatus'] = event['AgendaStatus'] e['agendaStatus'] = event['AgendaStatus']
e['address'] = event['Address'] e['address'] = event['Address']
e['description'] = event['Description'] e['description'] = event['Description']

View File

@@ -32,8 +32,9 @@ except Exception as e:
print("Scraper: ", scraper) print("Scraper: ", scraper)
tz = pytz.timezone("US/Central") tz = pytz.timezone("US/Central")
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%B %d, %Y at %I:%M %p' DATETIME_FORMAT = '%B %d, %Y at %I:%M %p %z %Z'
calendar_url = 'https://www.stpaul.gov/calendar' calendar_url = 'https://www.stpaul.gov/calendar'
city_site = "https://www.stpaul.gov" city_site = "https://www.stpaul.gov"
@@ -64,7 +65,7 @@ def getEvents(br):
event_type = 'Gv', event_type = 'Gv',
show_title = title, show_title = title,
show_link = link, show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT), show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT), show_day = datetime.strptime(dateTime, DATETIME_FORMAT),
venue = venue, venue = venue,
scraper = scraper scraper = scraper

View File

@@ -20,14 +20,16 @@ venue, created = Organization.objects.get_or_create(
is_venue=True is_venue=True
) )
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'pdx') scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'pdx')
event_type = "Gv" event_type = "Gv"
# Time Signatures # Time Signatures
tz = pytz.timezone("US/Eastern") tz = pytz.timezone("US/Eastern")
DATETIME_FORMAT = '%B %d, %Y %I:%M %p' tz_str = " -0800 UTC"
DATETIME_FORMAT_2 = '%B %d, %Y'
DATETIME_FORMAT = '%B %d, %Y %I:%M %p %z %Z'
DATETIME_FORMAT_2 = '%B %d, %Y %z %Z'
def get_events(ps, event_type): def get_events(ps, event_type):
print("Getting events ...") print("Getting events ...")
@@ -44,9 +46,9 @@ def get_events(ps, event_type):
event['time'] = c.xpath('.//*/span[@class="pe-4"]/text()')[0].replace("\n", "").strip() event['time'] = c.xpath('.//*/span[@class="pe-4"]/text()')[0].replace("\n", "").strip()
# event['time2'] = c.xpath('.//*/span[@class="pe-4"]/text()') # event['time2'] = c.xpath('.//*/span[@class="pe-4"]/text()')
try: try:
event['dateStamp'] = datetime.strptime(event['date']+" "+event['time'], DATETIME_FORMAT) event['dateStamp'] = datetime.strptime(event['date']+" "+event['time'] + tz_str, DATETIME_FORMAT)
except: except:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT_2) event['dateStamp'] = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT_2)
# ppr(event) # ppr(event)

View File

@@ -20,13 +20,14 @@ venue, created = Organization.objects.get_or_create(
is_venue=True is_venue=True
) )
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'phl') scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'phl')
event_type = "Gv" event_type = "Gv"
# Time Signatures # Time Signatures
tz = pytz.timezone("US/Eastern") tz = pytz.timezone("US/Eastern")
DATETIME_FORMAT = '%m/%d/%Y %I:%M %p' tz_str = " -0500 UTC"
DATETIME_FORMAT = '%m/%d/%Y %I:%M %p %z %Z'
DATETIME_FORMAT_2 = '%B %d, %Y' DATETIME_FORMAT_2 = '%B %d, %Y'
def get_events(ps, event_type): def get_events(ps, event_type):
@@ -41,7 +42,7 @@ def get_events(ps, event_type):
event['title'] = c.xpath('.//*/a/text()')[0] event['title'] = c.xpath('.//*/a/text()')[0]
event['time'] = c.xpath('.//*/span/text()')[2] event['time'] = c.xpath('.//*/span/text()')[2]
event['date'] = c.xpath('.//td[@class="rgSorted"]/text()')[0] event['date'] = c.xpath('.//td[@class="rgSorted"]/text()')[0]
event['dateStamp'] = datetime.strptime(event['date']+" "+event['time'], DATETIME_FORMAT) event['dateStamp'] = datetime.strptime(event['date']+" "+event['time'] + tz_str, DATETIME_FORMAT)
# ppr(event) # ppr(event)
digitools.createBasicEvent(event, event_type, venue) digitools.createBasicEvent(event, event_type, venue)
scraper.items+=1 scraper.items+=1

View File

@@ -0,0 +1,53 @@
import requests, os, sys
from icalendar import Calendar as iCalendar, Event
from datetime import datetime, timedelta
from dateutil import relativedelta
td = relativedelta.relativedelta(hours=5)
from pprint import pprint as ppr
import pytz
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from events.models import Event as DSEvent, Organization, Scraper, Calendar
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Yoga MDE",
city="Medellin",
website="https://digisnaxx.com/",
)
try:
scraper, created = Scraper.objects.get_or_create(
name="Yoga MDE",
website="https://calendar.google.com/calendar/ical/721a61e06e9a96b7bd30cdb7439aa7791829c316bb8b77237ea93cf41d5396b4%40group.calendar.google.com/public/basic.ics",
calendar = Calendar.objects.get(shortcode='mde'),
items = 0,
new_items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name=venue.name)
item_count_start = scraper.items
objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text.strip())
counter = 0
digitools.getiCalRepeateEvents(gcal, scraper, venue, "Ex", "mde")
digitools.updateScraper(scraper, item_count_start)
new_time = datetime.now() - timedelta(days=1)
right_bound_time = datetime.now() + timedelta(days=45)
events = DSEvent.objects.filter(show_date__lte=new_time)
events1 = DSEvent.objects.filter(show_date__gte=right_bound_time)
for e in events:
e.delete()
for e in events1:
e.delete()

View File

@@ -0,0 +1,55 @@
import requests, os, sys
from icalendar import Calendar as iCalendar, Event
from datetime import datetime, timedelta
from dateutil import relativedelta
td = relativedelta.relativedelta(hours=5)
from pprint import pprint as ppr
import pytz
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from events.models import Event as DSEvent, Organization, Scraper, Calendar
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="idioki",
city="Medellin",
website="https://idioki.com/",
)
try:
scraper, created = Scraper.objects.get_or_create(
name="Yoga MDE",
website="",
calendar = Calendar.objects.get(shortcode='mde'),
items = 0,
new_items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name=venue.name)
event_type = "Mu"
item_count_start = scraper.items
objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text)
counter = 0
digitools.getiCalRepeateEvents(gcal, scraper, venue, "Ed", "mde")
digitools.updateScraper(scraper, item_count_start)
new_time = datetime.now() - timedelta(days=1)
right_bound_time = datetime.now() + timedelta(days=45)
events = DSEvent.objects.filter(show_date__lte=new_time)
events1 = DSEvent.objects.filter(show_date__gte=right_bound_time)
for e in events:
e.delete()
for e in events1:
e.delete()

View File

@@ -1,7 +1,7 @@
import requests, os, sys import requests, os, sys
from icalendar import Calendar as iCalendar, Event from icalendar import Calendar as iCalendar, Event
from datetime import datetime from datetime import datetime, timedelta
from dateutil import relativedelta from dateutil import relativedelta
td = relativedelta.relativedelta(hours=5) td = relativedelta.relativedelta(hours=5)
@@ -34,13 +34,11 @@ except Exception as e:
print(e) print(e)
scraper = Scraper.objects.get(name=venue.name) scraper = Scraper.objects.get(name=venue.name)
event_type = "Mu"
item_count_start = scraper.items item_count_start = scraper.items
objIcalData = requests.get(scraper.website) objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text) gcal = iCalendar.from_ical(objIcalData.text)
counter = 0 counter = 0
digitools.getiCalRepeateEvents(gcal, scraper, venue, "Ed", cal) digitools.getiCalRepeateEvents(gcal, scraper, venue, "Ed", "mde")
digitools.updateScraper(scraper, item_count_start) digitools.updateScraper(scraper, item_count_start)
new_time = datetime.now() - timedelta(days=1) new_time = datetime.now() - timedelta(days=1)

View File

@@ -1,7 +1,7 @@
import requests, os, sys import requests, os, sys
from icalendar import Calendar as iCalendar, Event from icalendar import Calendar as iCalendar, Event
from datetime import datetime from datetime import datetime, timedelta
from dateutil import relativedelta from dateutil import relativedelta
td = relativedelta.relativedelta(hours=5) td = relativedelta.relativedelta(hours=5)
@@ -40,17 +40,18 @@ objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text) gcal = iCalendar.from_ical(objIcalData.text)
counter = 0 counter = 0
print("Got Gcal")
digitools.getiCalRepeateEvents(gcal, scraper, venue, "Ed", cal) digitools.getiCalRepeateEvents(gcal, scraper, venue, "Ed", "000")
print("Got EVents")
digitools.updateScraper(scraper, item_count_start) digitools.updateScraper(scraper, item_count_start)
print("Updated")
# new_time = datetime.now() - timedelta(days=1)
# right_bound_time = datetime.now() + timedelta(days=45)
# events = DSEvent.objects.filter(show_date__lte=new_time)
# events1 = DSEvent.objects.filter(show_date__gte=right_bound_time)
new_time = datetime.now() - timedelta(days=1) # for e in events:
right_bound_time = datetime.now() + timedelta(days=45) # e.delete()
events = DSEvent.objects.filter(show_date__lte=new_time)
events1 = DSEvent.objects.filter(show_date__gte=right_bound_time)
for e in events: # for e in events1:
e.delete() # e.delete()
for e in events1:
e.delete()

View File

@@ -21,13 +21,19 @@ venue, created = Organization.objects.get_or_create(
website="https://chicityclerkelms.chicago.gov/Meetings/", website="https://chicityclerkelms.chicago.gov/Meetings/",
) )
website="https://calendar.google.com/calendar/ical/chicagolegislativereference%40gmail.com/public/basic.ics", website="https://calendar.google.com/calendar/ical/chicagolegislativereference%40gmail.com/public/basic.ics"
# website="https://calendar.google.com/calendar/ical/chicagolegislativereference%40gmail.com/public/basic.ics"
scraper,item_count_start,virtcal = digitools.getScraper(venue, website, 'chi') scraper,item_count_start,virtcal = digitools.getScraper(venue, website, 'chi')
print("Got Scraper")
event_type = "Gv" event_type = "Gv"
item_count_start = scraper.items item_count_start = scraper.items
objIcalData = requests.get(scraper.website)
objIcalData = requests.get(website)
print("Got Obj")
gcal = iCalendar.from_ical(objIcalData.text) gcal = iCalendar.from_ical(objIcalData.text)
print("Got Gcal")
events = digitools.getiCalEvents(gcal, scraper, venue, event_type) events = digitools.getiCalEvents(gcal, scraper, venue, event_type)
digitools.buildiCalEvents(events, event_type) print("Got Events")
digitools.buildiCalEvents(events, event_type, scraper, venue)
digitools.updateScraper(scraper, item_count_start) digitools.updateScraper(scraper, item_count_start)

View File

@@ -32,5 +32,5 @@ objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text) gcal = iCalendar.from_ical(objIcalData.text)
events = digitools.getiCalEvents(gcal, scraper, venue, event_type) events = digitools.getiCalEvents(gcal, scraper, venue, event_type)
digitools.buildiCalEvents(events, event_type) digitools.buildiCalEvents(events, event_type, scraper, venue)
digitools.updateScraper(scraper, item_count_start) digitools.updateScraper(scraper, item_count_start)

View File

@@ -28,5 +28,5 @@ item_count_start = scraper.items
objIcalData = requests.get(scraper.website) objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text) gcal = iCalendar.from_ical(objIcalData.text)
events = digitools.getiCalEvents(gcal, scraper, venue, event_type) events = digitools.getiCalEvents(gcal, scraper, venue, event_type)
digitools.buildiCalEvents(events, event_type) digitools.buildiCalEvents(events, event_type, scraper, venue)
digitools.updateScraper(scraper, item_count_start) digitools.updateScraper(scraper, item_count_start)

View File

@@ -28,5 +28,5 @@ item_count_start = scraper.items
objIcalData = requests.get(scraper.website) objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text) gcal = iCalendar.from_ical(objIcalData.text)
events = digitools.getiCalEvents(gcal, scraper, venue, event_type) events = digitools.getiCalEvents(gcal, scraper, venue, event_type)
digitools.buildiCalEvents(events, event_type) digitools.buildiCalEvents(events, event_type, scraper, venue)
digitools.updateScraper(scraper, item_count_start) digitools.updateScraper(scraper, item_count_start)

View File

@@ -28,5 +28,5 @@ item_count_start = scraper.items
objIcalData = requests.get(scraper.website) objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text) gcal = iCalendar.from_ical(objIcalData.text)
events = digitools.getiCalEvents(gcal, scraper, venue, "Mu") events = digitools.getiCalEvents(gcal, scraper, venue, "Mu")
digitools.buildiCalEvents(events, event_type) digitools.buildiCalEvents(events, event_type, scraper, venue)
digitools.updateScraper(scraper, item_count_start) digitools.updateScraper(scraper, item_count_start)

View File

@@ -29,5 +29,5 @@ item_count_start = scraper.items
objIcalData = requests.get(scraper.website) objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text) gcal = iCalendar.from_ical(objIcalData.text)
events = digitools.getiCalEvents(gcal, scraper, venue, event_type) events = digitools.getiCalEvents(gcal, scraper, venue, event_type)
digitools.buildiCalEvents(events, event_type) digitools.buildiCalEvents(events, event_type, scraper, venue)
digitools.updateScraper(scraper, item_count_start) digitools.updateScraper(scraper, item_count_start)

View File

@@ -28,5 +28,5 @@ item_count_start = scraper.items
objIcalData = requests.get(scraper.website) objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text) gcal = iCalendar.from_ical(objIcalData.text)
events = digitools.getiCalEvents(gcal, scraper, venue, event_type) events = digitools.getiCalEvents(gcal, scraper, venue, event_type)
digitools.buildiCalEvents(events, event_type) digitools.buildiCalEvents(events, event_type, scraper, venue)
digitools.updateScraper(scraper, item_count_start) digitools.updateScraper(scraper, item_count_start)

View File

@@ -28,6 +28,9 @@ scraper,item_count_start,virtcal = digitools.getScraper(venue, website, 'msp')
item_count_start = scraper.items item_count_start = scraper.items
objIcalData = requests.get(scraper.website) objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.content) gcal = iCalendar.from_ical(objIcalData.content)
print("Got gcal")
events = digitools.getiCalEvents(gcal, scraper, venue, event_type) events = digitools.getiCalEvents(gcal, scraper, venue, event_type)
digitools.buildiCalEvents(events, event_type) print("Got Events")
digitools.buildiCalEvents(events, event_type, scraper, venue)
print("Built Events")
digitools.updateScraper(scraper, item_count_start) digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,86 @@
import os, sys
from datetime import datetime, timedelta
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from selenium.webdriver.common.by import By
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "+0100 UTC"
DATETIME_FORMAT = '%B %d %Y %H:%M %z %Z'
venue, created = Organization.objects.get_or_create(
name="Arena Wien",
city="Vienna",
website="https://arena.wien/Home/Programm",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'vie')
scraper.items = 0
scraper.save()
def getSite(br, website):
ps = digitools.getSource(br, website)
get_events(ps, "Mu")
sleep(3)
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="suite_calRowContainer "]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/span[@class="suite_datePlate"]/span/text()')
time = c.xpath('.//*/span[@class="col-md-2 suite_EvenTime"]/span/text()')
time = [x.replace("\n", "").strip() for x in time][2]
day, month, year = date[0].strip(), date[2].split("|")[0].replace(".", "").strip(), date[2].split("|")[1].strip()
month = digitools.translateMonth(month)
event['date'] = (' ').join([month, day, year, time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/span[@class="Event_H1"]/text()')[0]
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except Exception as e:
print(e)
pass
event['link'] = c.xpath('.//a/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
ppr(event)
# print("\n+++\n")
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
month = datetime.now().month
year = datetime.now().year
odt_next_month = datetime.now() + relativedelta.relativedelta(months=1)
next_month = odt_next_month.month
website = "https://arena.wien/Home/Programm#data_abonnement=-1&data_month={}&data_year={}".format(str(month), str(year))
website_2 = "https://arena.wien/Home/Programm#data_abonnement=-1&data_month={}&data_year={}".format(str(next_month), str(year))
getSite(br,website)
getSite(br,website_2)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,81 @@
import os, sys
from datetime import datetime, timedelta
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from selenium.webdriver.common.by import By
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "+0100 UTC"
DATETIME_FORMAT = '%B %d %Y %H:%M %z %Z'
venue, created = Organization.objects.get_or_create(
name="City Site",
city="Vienna",
website="https://www.wien.gv.at/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'vie')
scraper.items = 0
scraper.save()
def getSite(br, website):
ps = digitools.getSource(br, website)
get_events(ps, "Mu")
sleep(3)
def get_events(ps, event_type):
contents = ps.xpath('.//*/wm-card')
for c in contents:
try:
event = {}
date = c.xpath('.//*/ul/li/text()')
# time = c.xpath('.//*/span[@class="col-md-2 suite_EvenTime"]/span/text()')
# time = [x.replace("\n", "").strip() for x in time][2]
# day, month, year = date[0].strip(), date[2].split("|")[0].replace(".", "").strip(), date[2].split("|")[1].strip()
# month = digitools.translateMonth(month)
event['date'] = (' ').join([month, day, year, time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/h3/a/text()')[0]
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except Exception as e:
print(e)
pass
event['link'] = c.xpath('.//a/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
ppr(event)
# print("\n+++\n")
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
site_append = "veranstaltungen/suche?q=&facet={}&page={}".format('this month', 1)
website = venue.website + site_append
getSite(br,website)
# getSite(br,website_2)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,77 @@
import os, sys
from datetime import datetime, timedelta
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from selenium.webdriver.common.by import By
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "+0100 UTC"
DATETIME_FORMAT = '%b %d %Y %H:%M %z %Z'
venue, created = Organization.objects.get_or_create(
name="Flex",
city="Vienna",
website="https://flex.at/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'vie')
scraper.items = 0
scraper.save()
def getSite(br, website):
ps = digitools.getSource(br, website)
get_events(ps, "Mu")
sleep(3)
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="ectbe-inner-wrapper ectbe-simple-event "]')
for c in contents:
try:
event = {}
date = c.xpath('.//div[@class="ectbe-date-wrp elementor-repeater-item-dd81c19"]/span/text()')
time = c.xpath('.//*/div[@class="ectbe-evt-time elementor-repeater-item-fd61cbc"]/text()')[0].split('-')[0].strip()
year = datetime.now().year
if date[1] == 'März':
date[1] = "Mar"
event['date'] = (' ').join([date[1].replace(".", ""), date[0], str(year), time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/h2/text()')[0]
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except Exception as e:
print(e)
pass
event['link'] = c.xpath('.//a[@class="ectbe-evt-read-more"]/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
# ppr(event)
except Exception as e:
print("\nError: ", e)
ppr(event)
# print("\n+++\n")
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
getSite(br, venue.website)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,66 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0600 UTC"
DATETIME_FORMAT = '%a %B %d %Y %I:%M%p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Empty Bottle",
city="Chicago",
website="https://www.emptybottle.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'chi')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="eb-item"]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/div[@class="date"]/text()')[0]
time = c.xpath('.//*/div[@class="start-time"]/text()')[0]
year = datetime.now().year
event['date'] = (' ').join([date, str(year), time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/div[@class="title"]/text()')[0]
event['venue'] = c.xpath('.//*/a[@class="venue"]/text()')[0]
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = venue.website
if event['venue'] == 'Empty Bottle':
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
# ppr(event)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,68 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz = pytz.timezone("US/Central")
tz_str = "-0600 UTC"
DATETIME_FORMAT = '%a, %b %d, %Y %I:%M%p %z %Z'
DATETIME_FORMAT_2 = '%a, %b %d, %Y %I %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Hideout - Chicago",
city="Chicago",
website="https://hideoutchicago.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'chi')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="col-12 eventWrapper rhpSingleEvent py-4 px-0 rhp-event__single-event--list"]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/div[@id="eventDate"]/text()')[0].replace("\n", "").replace("\t", "")
time = c.xpath('.//*/span[@class="font0by75 fontWeight500 lineHeight15 rhp-event__time-text--list"]/text()')[0].split("//")[1].replace("Doors: ", "").replace("\n", "").replace("\t", "")
event['date'] = (' ').join([date, time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/a[@id="eventTitle"]/h2/text()')[0].replace("\n", "").replace("\t", "")
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT_2)
event['link'] = c.xpath('.//*/a[@id="eventTitle"]/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
# ppr(event)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,76 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz = pytz.timezone("US/Central")
tz_str = "-0600 UTC"
DATETIME_FORMAT = '%Y-%m-%d %I:%M %p %z %Z'
# DATETIME_FORMAT_2 = '%a, %b %d, %Y %I %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Reggies",
city="Chicago",
website="https://www.reggieslive.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'chi')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/article')
for c in contents:
try:
event = {}
date = c.xpath('.//*/time/@datetime')[0]
time = c.xpath('.//*/li[@class="first"]/text()')[0]
presented_by = c.xpath('.//*/p[@class="presented-by"]/text()')[0]
event['date'] = (' ').join([date, time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/hgroup/h2/text()')[0]
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except:
event['date'] = (' ').join([date, time.replace("Doors", "").strip(), tz_str])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = venue.website
if "omedy" in presented_by:
digitools.createBasicEvent(event, "Co", venue)
elif any(word in presented_by for word in ["game", "bingo", "Sports"]):
digitools.createBasicEvent(event, "Co", venue)
else:
digitools.createBasicEvent(event, "Mu", venue)
except Exception as e:
print("\nError: ", e)
# ppr(event)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
themes = []
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(1)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,73 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz = pytz.timezone("US/Central")
tz_str = "-0600 UTC"
DATETIME_FORMAT = '%a %b %d %Y %I:%M %p %z %Z'
# DATETIME_FORMAT_2 = '%a, %b %d, %Y %I %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Subterranean",
city="Chicago",
website="https://subt.net/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'chi')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="event-info-block"]')
for c in contents:
try:
event = {}
date = c.xpath('.//p[@class="fs-18 bold mt-1r event-date"]/text()')[0]
time = c.xpath('.//*/span[@class="door-time"]/text()')[0].replace("Doors: ", "").replace("/", "").strip()
year = datetime.now().year
event['date'] = (' ').join([date, str(year), time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//p[@class="fs-18 bold mb-12 event-title"]/a/text()')[0]
try:
event['support'] = c.xpath('.//p[@class="fs-12 supporting-talent"]/text()')[0]
except:
pass
event['venue'] = c.xpath('.//p[@class="fs-12 venue"]/text()')[0]
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = venue.website
if len(event['support']) > 0:
event['new_title'] = event['title'] + " w/ " + event['support']
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
# ppr(event)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,67 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0600 UTC"
DATETIME_FORMAT = '%a %B %d %Y %I:%M%p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Thalia Hall",
city="Chicago",
website="https://www.thaliahallchicago.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'chi')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="eb-item"]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/div[@class="date"]/text()')[0]
time = c.xpath('.//*/div[@class="start-time"]/text()')[0].replace("Doors: ", "")
year = datetime.now().year
event['date'] = (' ').join([date, str(year), time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/div[@class="title"]/text()')[0]
event['venue'] = c.xpath('.//*/a[@class="venue"]/text()')[0]
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = venue.website
if event['venue'] == 'Thalia Hall':
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,65 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0500 UTC"
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Asylum Comedy",
city="Boston",
website="https://calendar.improvasylum.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'bos')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="space-y-4"]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/h3[@class="text-lg font-extrabold uppercase"]/text()')[0]
time = c.xpath('.//*/p[@class="flex items-center gap-2"]/text()')[0].split("-")[0]
venue_str = c.xpath('.//*/p[@class="flex items-center gap-2"]/text()')[1]
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/h4[@class="text-lg font-extrabold transition-colors"]/text()')[0]
event['date'] = [date, time, tz_str]
event['date'] = " ".join(event['date'])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = venue.website
digitools.createBasicEvent(event, "Co", venue)
scraper.items+=1
print("\nSuccessn")
except Exception as e:
print("\nError: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Co")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,68 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0500 UTC"
DATETIME_FORMAT = '%A, %B %d %Y %I:%M %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="JP Centre Yoga",
city="Boston",
website="https://www.jpcentreyoga.com/yoga-class-schedule",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'bos')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="bw-widget__day"]')
for cn in contents:
date = cn.xpath('.//div[1]/text()')[0]
events = cn.xpath('.//div[@class="bw-session"]')
for c in events:
try:
event = {}
time = c.xpath('.//*/time[@class="hc_starttime"]/text()')[0]
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/div[@class="bw-session__name"]/text()')[1].replace("\n", "").strip()
year = datetime.now().year
event['date'] = [date, str(year), time, tz_str]
event['date'] = " ".join(event['date'])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = venue.website
event, created = digitools.createBasicEvent(event, "Ex", venue)
if "o n l i n e" in event.show_title:
event = digitools.add_calendar(event, "000")
scraper.items+=1
except Exception as e:
print("\nError: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Co")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,70 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0500 UTC"
DATETIME_FORMAT = '%m.%d %Y %I:%M%p %z %Z'
venue, created = Organization.objects.get_or_create(
name="The Middle East",
city="Boston",
website="https://mideastclub.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'bos')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="tw-section"]/div[@class="row"]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/span[@class="tw-event-date"]/text()')[0]
time = c.xpath('.//*/span[@class="tw-event-time"]/text()')[0].replace("Show: ", "")
year = datetime.now().year
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/div[@class="tw-name"]/a/text()')[0]
event['details'] = c.xpath('.//*/span[@class="tw-venue-name"]/text()')[0].replace("\n", "").strip()
event['date'] = [date, str(year), time, tz_str]
event['date'] = " ".join(event['date'])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = c.xpath('.//*/div[@class="tw-info-price-buy-tix"]/a/@href')[0]
if "Middle East" in event['details']:
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
ppr(event)
print("\n+++\n")
except Exception as e:
print("\nError: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
links = ["https://mideastclub.com/page/1/", "https://mideastclub.com/page/2/", "https://mideastclub.com/page/3/", "https://mideastclub.com/page/4/"]
for link in links:
ps = digitools.getSource(br, link)
get_events(ps, "Co")
digitools.updateScraper(scraper, item_count_start)
br.close()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
import os, sys import os, sys
from datetime import datetime from datetime import datetime, timedelta
from dateutil import relativedelta from dateutil import relativedelta
import json import json
@@ -12,9 +12,10 @@ dtss.getReady()
from time import sleep from time import sleep
from pprint import pprint as ppr from pprint import pprint as ppr
from zoneinfo import ZoneInfo
import pytz import pytz
from events.models import Organization, Scraper, Calendar from events.models import Event, Organization, Scraper, Calendar
import events.digitools as digitools import events.digitools as digitools
venue, created = Organization.objects.get_or_create( venue, created = Organization.objects.get_or_create(
@@ -25,9 +26,14 @@ venue, created = Organization.objects.get_or_create(
) )
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'mde') scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'mde')
tz_str = " -0500 UTC"
DATETIME_FORMAT = '%d/%m/%y %I:%M %p %z %Z'
# DATETIME_FORMAT = '%B %d %Y %I:%M%p %z %Z'
# DATETIME_FORMAT = '%B %d %Y %I:%M %p %z %Z'
# DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y %z %Z'
DATETIME_FORMAT = '%B %d %Y %I:%M%p' td = timedelta(days=31)
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y' future_date = datetime.now(ZoneInfo("America/Chicago")) + td
# with open('data.json') as f: # with open('data.json') as f:
# totalLinks = json.load(f) # totalLinks = json.load(f)
@@ -52,18 +58,18 @@ def getLinks(br, url, links):
# newlinks = ps.xpath('.//*/div[@class="e-con-inner"]/*/a/@href') # newlinks = ps.xpath('.//*/div[@class="e-con-inner"]/*/a/@href')
events = ps.xpath('.//*/div[@class="e-con-inner"]') events = ps.xpath('.//*/div[@class="e-con-inner"]')
for event in events: for event in events:
e = {} ev = {}
try: try:
e['link'] = event.xpath('.//*/a/@href')[0] ev['link'] = event.xpath('.//*/a/@href')[0]
e['title'] = event.xpath('.//*/h3/a/text()')[0] ev['title'] = event.xpath('.//*/h3/a/text()')[0]
e['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0].replace('\n', '').replace('\t', '') ev['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0].replace('\n', '').replace('\t', '')
# e['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0] # e['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0]
e['venueLink'] = event.xpath('.//*/ul/li/a/@href')[1] ev['venueLink'] = event.xpath('.//*/ul/li/a/@href')[1]
label= event.xpath('.//*/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/div/text()') label= event.xpath('.//*/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/div/text()')
e['label'] = ''.join([x.replace('\t', '').replace('\n', '') for x in label]).strip() ev['label'] = ''.join([x.replace('\t', '').replace('\n', '') for x in label]).strip()
newLinks.append(e) newLinks.append(ev)
except Exception as e: except Exception as e:
print("Error: ", e) print("Error: ", ev, e)
links = links + newLinks links = links + newLinks
return links return links
@@ -89,10 +95,16 @@ ppr(len(totalLinks))
for event in totalLinks: for event in totalLinks:
br.get(event['link']) br.get(event['link'])
sleep(2) sleep(1)
ps = html.fromstring(br.page_source) ps = html.fromstring(br.page_source)
dateTime= ps.xpath('.//*/div[@class="elementor-element elementor-element-d9beb21 elementor-widget elementor-widget-heading"]/span[@class="elementor-heading-title elementor-size-default"]/text()') dateTime= ps.xpath('.//*/div[@class="elementor-element elementor-element-d9beb21 elementor-widget elementor-widget-heading"]/span[@class="elementor-heading-title elementor-size-default"]/text()')
event['dateTime'] = [x[3:].split('-')[0].strip() for x in dateTime] event['dateTime'] = [x[3:].split('-')[0].strip() for x in dateTime]
try:
event['dateStamp'] = datetime.strptime(event['dateTime'][0] +tz_str, DATETIME_FORMAT)
if future_date <= event['dateStamp']:
print("Future Date")
pass
else:
locations = ps.xpath('.//*/div[@class="elementor-element elementor-element-f04aae3 elementor-widget__width-initial elementor-widget-mobile__width-initial elementor-widget elementor-widget-ts-advanced-list"]/*/li[@class="elementor-repeater-item-138dbed flexify ts-action"]/a/text()') locations = ps.xpath('.//*/div[@class="elementor-element elementor-element-f04aae3 elementor-widget__width-initial elementor-widget-mobile__width-initial elementor-widget elementor-widget-ts-advanced-list"]/*/li[@class="elementor-repeater-item-138dbed flexify ts-action"]/a/text()')
location = [x.replace('\t', '').replace('\n', '') for x in locations] location = [x.replace('\t', '').replace('\n', '') for x in locations]
if len(location) == 2: if len(location) == 2:
@@ -107,17 +119,23 @@ for event in totalLinks:
event['address'] = [x for x in address if 'Capacidad' not in x and '$' not in x][0] event['address'] = [x for x in address if 'Capacidad' not in x and '$' not in x][0]
except: except:
event['address'] = address event['address'] = address
except Exception as e:
print("Error: ", event, e)
pass
# ppr(event) # ppr(event)
sleep(2) sleep(1)
br.close() br.close()
data = totalLinks data = [i for i in totalLinks if 'dateStamp' in i]
new_data = [i for i in data if 'location' in i]
print("Set:", len(totalLinks)) print("Set:", len(totalLinks))
print("Data Set:", len(data))
print("New Data Set:", len(new_data))
paisa = [] paisa = []
for d in data: for d in new_data:
if len(d['dateTime']) != 0: if len(d['dateTime']) != 0:
if 'Familia' in d['label']: if 'Familia' in d['label']:
d['category'] = 'Ot' d['category'] = 'Ot'
@@ -139,7 +157,6 @@ for d in data:
cal = Calendar.objects.get(shortcode='mde') cal = Calendar.objects.get(shortcode='mde')
for d in paisa: for d in paisa:
d['dateStamp'] =datetime.strptime(d['dateTime'][0], DATETIME_FORMAT)
try: try:
nvenue, created = Organization.objects.get_or_create( nvenue, created = Organization.objects.get_or_create(
name=d['venue'], name=d['venue'],
@@ -152,6 +169,7 @@ for d in paisa:
nvenue = Organization.objects.get(name=d['venue']) nvenue = Organization.objects.get(name=d['venue'])
nvenue.address_complete = d['address'] nvenue.address_complete = d['address']
nvenue.save() nvenue.save()
new_event, created = Event.objects.update_or_create( new_event, created = Event.objects.update_or_create(
event_type = d['category'], event_type = d['category'],
show_title = d['title'], show_title = d['title'],
@@ -163,6 +181,6 @@ for d in paisa:
) )
new_event.calendar.add(cal) new_event.calendar.add(cal)
new_event.save() new_event.save()
print(new_event) # print("Success:", new_event)
digitools.updateScraper(scraper, item_count_start) digitools.updateScraper(scraper, item_count_start)

View File

@@ -13,9 +13,8 @@ import pytz
from events.models import Organization, Scraper, Calendar, Event from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools import events.digitools as digitools
tz = pytz.timezone("US/Central") tz_str = " -0600 UTC"
DATETIME_FORMAT = '%b %d %Y %I:%M %p' DATETIME_FORMAT = '%b %d %Y %I:%M %p %z %Z'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
venue, created = Organization.objects.get_or_create( venue, created = Organization.objects.get_or_create(
name="Acme Comedy Club", name="Acme Comedy Club",
@@ -43,14 +42,13 @@ def get_events(ps, event_type):
event['title'] = c.xpath('.//*/span[@class="event_title"]/a/text()')[0] event['title'] = c.xpath('.//*/span[@class="event_title"]/a/text()')[0]
event['date'] = [month, day, str(year), c.xpath('.//*/span[@class="event_time"]/text()')[0].strip()] event['date'] = [month, day, str(year), c.xpath('.//*/span[@class="event_time"]/text()')[0].strip()]
event['date'] = " ".join(event['date']) event['date'] = " ".join(event['date'])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT) event['dateStamp'] = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT)
event['link'] = c.xpath('.//*/span[@class="event_title"]/a/@href')[0] event['link'] = c.xpath('.//*/span[@class="event_title"]/a/@href')[0]
digitools.createBasicEvent(event, "Co", venue) digitools.createBasicEvent(event, "Co", venue)
scraper.items+=1 scraper.items+=1
except Exception as e: except Exception as e:
print(e) print(e)
ppr(event) pass
print("\n\n+++\n\n")
if len(sys.argv) >= 2: if len(sys.argv) >= 2:
arg1 = sys.argv[1] arg1 = sys.argv[1]

View File

@@ -13,7 +13,6 @@ import pytz
from events.models import Organization, Scraper from events.models import Organization, Scraper
import events.digitools as digitools import events.digitools as digitools
venue, created = Organization.objects.get_or_create( venue, created = Organization.objects.get_or_create(
name="Green Room", name="Green Room",
city="Minneapolis", city="Minneapolis",
@@ -25,10 +24,8 @@ scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'm
event_type = "Mu" event_type = "Mu"
# Time Signatures tz_str = " -0600 UTC"
tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%a %b %d %Y %I:%M %p %z %Z'
DATETIME_FORMAT = '%a %b %d %Y %I:%M %p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
def get_events(ps, event_type): def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="vp-event-card vp-venue-greenroom vp-col"]') contents = ps.xpath('.//*/div[@class="vp-event-card vp-venue-greenroom vp-col"]')
@@ -45,14 +42,13 @@ def get_events(ps, event_type):
event['calendars'] = [scraper.calendar] event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/div[@class="vp-event-name"]/text()')[0] event['title'] = c.xpath('.//*/div[@class="vp-event-name"]/text()')[0]
event['datetime'] = date + " " + str(year) + " " + time event['datetime'] = date + " " + str(year) + " " + time
event['dateStamp'] = datetime.strptime(event['datetime'], DATETIME_FORMAT) event['dateStamp'] = datetime.strptime(event['datetime'] + tz_str, DATETIME_FORMAT)
event['link'] = venue.website + c.xpath('.//a[@class="vp-event-link"]/@href')[0] event['link'] = venue.website + c.xpath('.//a[@class="vp-event-link"]/@href')[0]
digitools.createBasicEvent(event, event_type, venue) digitools.createBasicEvent(event, event_type, venue)
scraper.items+=1 scraper.items+=1
except Exception as e: except Exception as e:
print(e) print(e)
ppr(event) ppr(event)
print("\n+++\n")
if len(sys.argv) >= 2: if len(sys.argv) >= 2:
arg1 = sys.argv[1] arg1 = sys.argv[1]

View File

@@ -26,8 +26,8 @@ venue, created = Organization.objects.get_or_create(
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp') scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
ppr(scraper) ppr(scraper)
tz = pytz.timezone("US/Central") tz_str = " -0600 UTC"
DATETIME_FORMAT = '%a, %b %d, %Y @ %I:%M %p' DATETIME_FORMAT = '%a, %b %d, %Y @ %I:%M %p %z %Z'
# Set initial variables for City, etc # Set initial variables for City, etc
calendar_url = [ calendar_url = [
@@ -60,7 +60,7 @@ def get_listings(pse, events):
except: except:
e['price'] = "See Link" e['price'] = "See Link"
e['image'] = event.xpath('.//*/img/@data-src')[0] e['image'] = event.xpath('.//*/img/@data-src')[0]
e["date_time"] = datetime.strptime(e['datetime'], DATETIME_FORMAT) e["date_time"] = datetime.strptime(e['datetime'] + tz_str, DATETIME_FORMAT)
e['scraper'] = scraper e['scraper'] = scraper
e['calendar'] = [scraper.calendar] e['calendar'] = [scraper.calendar]
events.append(e) events.append(e)

View File

@@ -22,8 +22,8 @@ venue, created = Organization.objects.get_or_create(
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp') scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
DATETIME_FORMAT = '%A, %B %d , %Y %I:%M %p' tz_str = " -0600 UTC"
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y' DATETIME_FORMAT = '%A, %B %d , %Y %I:%M %p %z %Z'
def get_events(ps, event_type): def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="day has-event"]') contents = ps.xpath('.//*/div[@class="day has-event"]')
@@ -38,7 +38,7 @@ def get_events(ps, event_type):
event['title'] = c.xpath('.//*/h3/text()')[0] event['title'] = c.xpath('.//*/h3/text()')[0]
event['date'] = [month, day, year, c.xpath('.//*/p[@class="time"]/text()')[0]] event['date'] = [month, day, year, c.xpath('.//*/p[@class="time"]/text()')[0]]
event['date'] = " ".join(event['date']) event['date'] = " ".join(event['date'])
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT) event['dateStamp'] =datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT)
event['link'] = "https://www.magersandquinn.com" + c.xpath('.//a[@class="event in-store"]/@href')[0] event['link'] = "https://www.magersandquinn.com" + c.xpath('.//a[@class="event in-store"]/@href')[0]
digitools.createBasicEvent(event, "Ed", venue) digitools.createBasicEvent(event, "Ed", venue)
scraper.items+=1 scraper.items+=1
@@ -63,7 +63,7 @@ for link in links:
ps = digitools.getSource(br, link) ps = digitools.getSource(br, link)
get_events(ps, "Ed") get_events(ps, "Ed")
sleep(3) sleep(3)
# ppr(events)
br.close() br.close()
digitools.updateScraper(scraper, item_count_start) digitools.updateScraper(scraper, item_count_start)

View File

@@ -26,8 +26,8 @@ except Exception as e:
scraper,item_count_start, virtcal = digitools.getScraper(venue, venue.website, 'msp') scraper,item_count_start, virtcal = digitools.getScraper(venue, venue.website, 'msp')
tz = pytz.timezone("US/Central") tz = pytz.timezone("US/Central")
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%b %d, %Y %I:%M %p' DATETIME_FORMAT = '%b %d, %Y %I:%M %p %z %Z'
def get_events(ps, event_type): def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="summary-content sqs-gallery-meta-container"]') contents = ps.xpath('.//*/div[@class="summary-content sqs-gallery-meta-container"]')
@@ -40,7 +40,7 @@ def get_events(ps, event_type):
event['title'] = c.xpath('.//*/a[@class="summary-title-link"]/text()')[0] event['title'] = c.xpath('.//*/a[@class="summary-title-link"]/text()')[0]
event['link'] = "https://theparkwaytheater.com" + c.xpath('.//*/a[@class="summary-title-link"]/@href')[0] event['link'] = "https://theparkwaytheater.com" + c.xpath('.//*/a[@class="summary-title-link"]/@href')[0]
event['date'] = c.xpath('.//div/div/time/text()')[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("")[0].strip() event['date'] = c.xpath('.//div/div/time/text()')[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("")[0].strip()
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT) event['dateStamp'] = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT)
event['desc'] = c.xpath('.//*/p/text()')[0] event['desc'] = c.xpath('.//*/p/text()')[0]
event['img_link'] = i event['img_link'] = i
event['details'] = { event['details'] = {
@@ -54,8 +54,8 @@ def get_events(ps, event_type):
event_type = event_type, event_type = event_type,
show_title = event['title'], show_title = event['title'],
show_link = event['link'], show_link = event['link'],
show_date = datetime.strptime(event['date'], DATETIME_FORMAT), show_date = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
show_day = datetime.strptime(event['date'], DATETIME_FORMAT), show_day = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
more_details = event["details"], more_details = event["details"],
venue = venue venue = venue
) )
@@ -64,14 +64,14 @@ def get_events(ps, event_type):
except Exception as e: except Exception as e:
try: try:
event['date'] = c.xpath('.//div/div/time/text()')[0].split("")[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("")[0].strip() event['date'] = c.xpath('.//div/div/time/text()')[0].split("")[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("")[0].strip()
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT) event['dateStamp'] = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT)
new_event, created = DSEvent.objects.update_or_create( new_event, created = DSEvent.objects.update_or_create(
scraper = scraper, scraper = scraper,
event_type = event_type, event_type = event_type,
show_title = event['title'], show_title = event['title'],
show_link = event['link'], show_link = event['link'],
show_date = datetime.strptime(event['date'], DATETIME_FORMAT), show_date = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
show_day = datetime.strptime(event['date'], DATETIME_FORMAT), show_day = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
more_details = event["details"], more_details = event["details"],
venue = venue venue = venue
) )
@@ -97,8 +97,6 @@ get_events(ps, "Mu")
calendar_url = "https://theparkwaytheater.com/movies" calendar_url = "https://theparkwaytheater.com/movies"
ps = digitools.getSource(br, calendar_url) ps = digitools.getSource(br, calendar_url)
get_events(ps, "Th") get_events(ps, "Th")
# ppr(events)
br.close() br.close()
digitools.updateScraper(scraper, item_count_start) digitools.updateScraper(scraper, item_count_start)

View File

@@ -13,7 +13,6 @@ import pytz
from events.models import Organization, Scraper, Event from events.models import Organization, Scraper, Event
import events.digitools as digitools import events.digitools as digitools
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from lxml import html from lxml import html
@@ -26,13 +25,9 @@ venue, created = Organization.objects.get_or_create(
scraper,item_count_start, virtcal = digitools.getScraper(venue, venue.website, 'msp') scraper,item_count_start, virtcal = digitools.getScraper(venue, venue.website, 'msp')
tz = pytz.timezone("US/Central") tz_str = " -0600 UTC"
DATETIME_FORMAT = '%a %B %d @ %I:%M %p %Y %z %Z'
DATETIME_FORMAT = '%a %B %d @ %I:%M %p %Y' DATETIME_FORMAT_2 = '%a %B %d, %Y @ %I:%M %p %z %Z'
DATETIME_FORMAT_2 = '%a %B %d, %Y @ %I:%M %p'
# DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y'
# DATETIME_FORMAT_3 = '%b %d %Y'
# Set initial variables for City, etc
calendar_url = 'https://noboolpresents.com/venues/uptown-vfw/' calendar_url = 'https://noboolpresents.com/venues/uptown-vfw/'
current_year = str(datetime.now().year) current_year = str(datetime.now().year)
@@ -54,16 +49,14 @@ def getEvents(br):
dateTime = event.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].replace("", "").strip() + " " + current_year dateTime = event.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].replace("", "").strip() + " " + current_year
title = event.xpath('.//*/h2[@class="alt-font"]/a/text()')[0].replace("\n", "").replace("\t", "") title = event.xpath('.//*/h2[@class="alt-font"]/a/text()')[0].replace("\n", "").replace("\t", "")
link = event.xpath('.//*/h2[@class="alt-font"]/a/@href')[0] link = event.xpath('.//*/h2[@class="alt-font"]/a/@href')[0]
# deets["tickets"] = event.xpath('.//*/span[@class="tribe-events-c-small-cta__price"]/strong/text()')[0]
try: try:
new_event, created = Event.objects.update_or_create( new_event, created = Event.objects.update_or_create(
scraper = scraper, scraper = scraper,
event_type = 'Mu', event_type = 'Mu',
show_title = title, show_title = title,
show_link = link, show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT), show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT), show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
# more_details = deets["tickets"],
venue = venue venue = venue
) )
digitools.add_calendar(new_event, 'msp') digitools.add_calendar(new_event, 'msp')
@@ -75,9 +68,8 @@ def getEvents(br):
event_type = 'Mu', event_type = 'Mu',
show_title = title, show_title = title,
show_link = link, show_link = link,
show_date = datetime.strptime(dateTime[:-4].strip(), DATETIME_FORMAT_2), show_date = datetime.strptime(dateTime[:-4].strip() + tz_str, DATETIME_FORMAT_2),
show_day = datetime.strptime(dateTime[:-4].strip(), DATETIME_FORMAT_2), show_day = datetime.strptime(dateTime[:-4].strip() + tz_str, DATETIME_FORMAT_2),
# more_details = deets["tickets"],
venue = venue venue = venue
) )
digitools.add_calendar(new_event, 'msp') digitools.add_calendar(new_event, 'msp')

View File

@@ -13,9 +13,8 @@ import pytz
from events.models import Organization, Scraper from events.models import Organization, Scraper
import events.digitools as digitools import events.digitools as digitools
tz = pytz.timezone("US/Central") tz_str = " -0600 UTC"
DATETIME_FORMAT = '%b %d %I:%M %p %Y' DATETIME_FORMAT = '%b %d %I:%M %p %Y %z %Z'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
venue, created = Organization.objects.get_or_create( venue, created = Organization.objects.get_or_create(
name="Cabooze", name="Cabooze",
@@ -29,7 +28,6 @@ scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'm
def get_events(ps, event_type): def get_events(ps, event_type):
print("Getting events ...") print("Getting events ...")
contents = ps.xpath('.//*/div[@class="vp-event-row vp-widget-reset vp-venue-thecabooze"]') contents = ps.xpath('.//*/div[@class="vp-event-row vp-widget-reset vp-venue-thecabooze"]')
ppr(contents)
for c in contents: for c in contents:
try: try:
event = {} event = {}
@@ -45,16 +43,14 @@ def get_events(ps, event_type):
event['title'] = c.xpath('.//*/div[@class="vp-event-name"]/text()')[0] event['title'] = c.xpath('.//*/div[@class="vp-event-name"]/text()')[0]
event['date'] = [date, time, str(year)] event['date'] = [date, time, str(year)]
event['date'] = " ".join(event['date']) event['date'] = " ".join(event['date'])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT) event['dateStamp'] = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT)
event['link'] = "https://www.cabooze.com/" + c.xpath('.//a[@class="vp-event-link"]/@href')[0] event['link'] = "https://www.cabooze.com/" + c.xpath('.//a[@class="vp-event-link"]/@href')[0]
# print("Event Dict Created")
# ppr(event)
digitools.createBasicEvent(event, event_type, venue) digitools.createBasicEvent(event, event_type, venue)
scraper.items+=1 scraper.items+=1
except Exception as e: except Exception as e:
print(e) print(e)
ppr(event) ppr(event)
print("\n\n+++\n\n")
if len(sys.argv) >= 2: if len(sys.argv) >= 2:
arg1 = sys.argv[1] arg1 = sys.argv[1]

View File

@@ -22,17 +22,13 @@ venue, created = Organization.objects.get_or_create(
scraper,item_count_start,virtcal = digitools.getScraper(venue,venue.website, 'msp') scraper,item_count_start,virtcal = digitools.getScraper(venue,venue.website, 'msp')
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p' tz_str = " -0600 UTC"
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y' DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p %z %Z'
DATETIME_FORMAT_3 = '%A, %B %d at %I:%M%p %Y'
DATETIME_FORMAT_4 = '%A, %B %d at %I%p %Y'
DATETIME_FORMAT_5 = '%A, %B %d @%I%p %Y'
def get_events(ps): def get_events(ps):
links = ps.xpath('.//*/div[@class="summary-title"]/a/@href') links = ps.xpath('.//*/div[@class="summary-title"]/a/@href')
print("Length of Links: ", len(links))
links = list(set(links)) links = list(set(links))
print("New Length of Links: ", len(links))
for l in links: for l in links:
if "cedar-news-blog" in l: if "cedar-news-blog" in l:
continue continue
@@ -54,7 +50,7 @@ def get_events(ps):
print(e) print(e)
print("failed event: ", event) print("failed event: ", event)
dateStamp = date + " " + time dateStamp = date + " " + time
event['dateStamp'] = datetime.strptime(dateStamp, DATETIME_FORMAT) event['dateStamp'] = datetime.strptime(dateStamp + tz_str, DATETIME_FORMAT)
digitools.createBasicEvent(event, "Mu", venue) digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1 scraper.items+=1

View File

@@ -35,9 +35,10 @@ venue, created = Organization.objects.get_or_create(
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp') scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
DATETIME_FORMAT = '%b %d %I%p %Y' tz_str = " -0600 UTC"
DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y' DATETIME_FORMAT = '%b %d %I%p %Y %z %Z'
DATETIME_FORMAT_3 = '%b %d %Y' DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y %z %Z'
DATETIME_FORMAT_3 = '%b %d %Y %z %Z'
# Set initial variables for City, etc # Set initial variables for City, etc
calendar_url = 'https://331club.com/#calendar' calendar_url = 'https://331club.com/#calendar'
@@ -72,13 +73,13 @@ for d in dates:
event = {} event = {}
event["datetime"] = event_date + time + [current_year] event["datetime"] = event_date + time + [current_year]
try: try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT) event["date_time"] = datetime.strptime(" ".join(event["datetime"]) + tz_str, DATETIME_FORMAT)
except: except:
try: try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_2) event["date_time"] = datetime.strptime(" ".join(event["datetime"]) + tz_str, DATETIME_FORMAT_2)
except: except:
try: try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_3) event["date_time"] = datetime.strptime(" ".join(event["datetime"]) + tz_str, DATETIME_FORMAT_3)
except: except:
event["date_time"] = "Invalid" event["date_time"] = "Invalid"
event["bands"] = (", ").join(bands) event["bands"] = (", ").join(bands)

View File

@@ -34,18 +34,17 @@ def get_info(pse):
except Exception as e: except Exception as e:
print("details issue: ", e) print("details issue: ", e)
try: try:
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT) event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"] + tz_str, DATETIME_FORMAT)
except Exception as e: except Exception as e:
print("Using alt date format 2: ", e) print("Using alt date format 2: ", e)
try: try:
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT_2) event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"] + tz_str, DATETIME_FORMAT_2)
ppr(event) ppr(event)
except Exception as e: except Exception as e:
print("Using alt date format 3: ", e) print("Using alt date format 3: ", e)
print(event['date']) print(event['date'])
event["date_time"] = datetime.strptime(" ".join(event["date"]), DATETIME_FORMAT_3) event["date_time"] = datetime.strptime(" ".join(event["date"]) + tz_str, DATETIME_FORMAT_3)
print("The Event:") print("The Event:")
ppr(event)
return event return event
def get_date(pse, event): def get_date(pse, event):
@@ -78,10 +77,10 @@ venue, created = Organization.objects.get_or_create(
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp') scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
tz = pytz.timezone("US/Central") tz = pytz.timezone("US/Central")
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%b %d %Y %I%p' DATETIME_FORMAT = '%b %d %Y %I%p %z %Z'
DATETIME_FORMAT_2 = '%b %d %Y %I:%M%p' DATETIME_FORMAT_2 = '%b %d %Y %I:%M%p %z %Z'
DATETIME_FORMAT_3 = '%b %d %Y' DATETIME_FORMAT_3 = '%b %d %Y %z %Z'
# Set initial variables for City, etc # Set initial variables for City, etc
month = int(datetime.now().month) month = int(datetime.now().month)
@@ -112,8 +111,6 @@ else:
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year) + next_month_string calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year) + next_month_string
# print("\n\n", calendar_url, calendar_url_2, "\n\n")
if len(sys.argv) >= 2: if len(sys.argv) >= 2:
arg1 = sys.argv[1] arg1 = sys.argv[1]
br = digitools.getBrowser(arg1) br = digitools.getBrowser(arg1)
@@ -128,6 +125,8 @@ if datetime.now().day < 8:
elif 7 < datetime.now().day < 15: elif 7 < datetime.now().day < 15:
ps = digitools.getSource(br, calendar_url) ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href') shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
ps = digitools.getSource(br, calendar_url_2)
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:15]
elif 14 < datetime.now().day < 21: elif 14 < datetime.now().day < 21:
ps = digitools.getSource(br, calendar_url) ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:95] shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:95]
@@ -176,4 +175,3 @@ br.close()
digitools.updateScraper(scraper, item_count_start) digitools.updateScraper(scraper, item_count_start)
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()

View File

@@ -0,0 +1,79 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
current_year = str(datetime.now().year)
venue, created = Organization.objects.get_or_create(
name="Guthrie Theater",
city="Minneapolis",
website="https://www.guthrietheater.org",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
event_type = "Mu"
tz_str = "-0600 UTC"
DATETIME_FORMAT = '%b %d %Y %I:%M %p %z %Z'
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="image-callouts-item-inner"]')
events = []
year = datetime.today().year
for c in contents:
try:
event = {}
event['title'] = c.xpath('.//figcaption/p/text()')[0]
event['link'] = c.xpath('.//figcaption/a/@href')[0]
event['link'] = venue.website + event['link']
event['scraper'] = scraper
event['calendars'] = scraper.calendar
# ppr(event)
events.append(event)
except Exception as e:
print(e)
pass
for e in events[:-3]:
try:
ps = digitools.getSource(br, e['link'])
nc = ps.xpath('.//div[@class="modal-row-inner buytix"]')
for n in nc:
date = n.xpath('.//div[@class="buytix-info"]/h4/text()')[0].split(',')[1].strip()
time = n.xpath('.//div[@class="buytix-time"]/p/text()')[0].strip()
e['datetime'] = "{0} {1} {2} {3}".format(date, year, time, tz_str)
e['dateStamp'] =datetime.strptime(e['datetime'], DATETIME_FORMAT)
digitools.createBasicEvent(e, 'Th', venue)
scraper.items+=1
except Exception as e:
print("Error: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
shows = "/shows-and-tickets/"
ps = digitools.getSource(br, venue.website+shows)
get_events(ps, event_type)
sleep(3)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -14,9 +14,8 @@ from events.models import Organization, Scraper
import events.digitools as digitools import events.digitools as digitools
count = 0 count = 0
tz = pytz.timezone("US/Central") tz_str = " -0600 UTC"
DATETIME_FORMAT = '%a, %b %d %Y %I%p' DATETIME_FORMAT = '%a, %b %d %Y %I%p %z %Z'
DATETIME_FORMAT_2 = '%a, %b %d %Y %I:%M %p SHOW'
venue, created = Organization.objects.get_or_create( venue, created = Organization.objects.get_or_create(
name="Icehouse", name="Icehouse",
@@ -38,19 +37,16 @@ def get_events(ps, event_type):
event['link'] = venue.website + c.xpath('.//*/a[@class="show-link"]/@href')[0] event['link'] = venue.website + c.xpath('.//*/a[@class="show-link"]/@href')[0]
event['date'] = c.xpath('.//*/h4[@class="day-of-week"]/text()')[0] event['date'] = c.xpath('.//*/h4[@class="day-of-week"]/text()')[0]
month = event['date'].split(' ')[1].strip() month = event['date'].split(' ')[1].strip()
print("MONTH: ", month)
year = int(datetime.today().year) year = int(datetime.today().year)
if month in ['Jan', 'Feb', 'Mar']: if month in ['Jan', 'Feb', 'Mar']:
year = year + 1 year = year + 1
time = c.xpath('.//p/span/text()')[0][:4].strip() time = c.xpath('.//p/span/text()')[0][:4].strip()
if time[-1:] == 'm': if time[-1:] == 'm':
event['dateStamp'] = event['date'] + ' ' + str(year) + ' ' + time event['dateStamp'] = event['date'] + ' ' + str(year) + ' ' + time
event['dateStamp'] =datetime.strptime(event['dateStamp'], DATETIME_FORMAT) event['dateStamp'] =datetime.strptime(event['dateStamp'] + tz_str, DATETIME_FORMAT)
digitools.createBasicEvent(event, event_type, venue) digitools.createBasicEvent(event, event_type, venue)
else: else:
ppr(event) print("Failed ?")
print("MONTH: ", month)
except Exception as e: except Exception as e:
print("What?",e,"\n\n+++") print("What?",e,"\n\n+++")
@@ -65,6 +61,5 @@ ps = digitools.getSource(br, venue.website)
br.execute_script("window.scrollTo(0, window.scrollY + 5000)") br.execute_script("window.scrollTo(0, window.scrollY + 5000)")
get_events(ps, "Mu") get_events(ps, "Mu")
# ppr(events)
br.close() br.close()
digitools.updateScraper(scraper, item_count_start) digitools.updateScraper(scraper, item_count_start)

View File

@@ -13,7 +13,6 @@ import pytz
from events.models import Organization, Scraper from events.models import Organization, Scraper
import events.digitools as digitools import events.digitools as digitools
current_year = str(datetime.now().year) current_year = str(datetime.now().year)
venue, created = Organization.objects.get_or_create( venue, created = Organization.objects.get_or_create(
@@ -27,39 +26,34 @@ scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'm
event_type = "Mu" event_type = "Mu"
# Time Signatures tz_str = " -0600 UTC"
tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%b. %d %Y %I:%M%p %z %Z'
DATETIME_FORMAT = '%b. %d %Y %I:%M%p'
DATETIME_FORMAT_night = '%b. %d %Y %I:%M %p'
DATETIME_FORMAT_2 = '%b. %d %Y %I:%Mam'
def get_events(ps, event_type): def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="sse-row sse-clearfix"]') contents = ps.xpath('.//*/div[@class="sse-row sse-clearfix"]')
for c in contents: for c in contents:
year = datetime.today().year year = datetime.today().year
try: try:
date = c.xpath('.//h1[@class="sse-size-64"]/text()')[0] date = c.xpath('.//h1[@class="sse-size-64"]/text()')[0]
month = date.split(".")[0] month = date.split(".")[0]
ppr(date) # ppr(date)
if month in ['JAN', 'FEB', 'MAR']: # if month in ['JAN', 'FEB', 'MAR']:
year = int(datetime.today().year) + 1 # year = int(datetime.today().year) + 1
event = {} event = {}
event['scraper'] = scraper event['scraper'] = scraper
event['calendars'] = scraper.calendar event['calendars'] = scraper.calendar
event['link'] = venue.website event['link'] = venue.website
event['title'] = c.xpath('.//p/span/b/text()')[0] event['title'] = c.xpath('.//p/span/b/text()')[0]
event['deets'] = c.xpath('.//p/span/text()')[0] event['deets'] = c.xpath('.//p/span/text()')[0]
event['title'] = event['title'] + ' ' + event['deets'] event['title'] = event['title'] + ' ' + event['deets']
paras = c.xpath('.//p/text()') paras = c.xpath('.//p/text()')
times = paras[1].split(" ")[1] times = paras[1].split(" ")[1]
event['datetime'] = "{0} {1} {2}".format(date, year, times) event['datetime'] = "{0} {1} {2}".format(date, year, times)
event['dateStamp'] =datetime.strptime(event['datetime'], DATETIME_FORMAT) event['dateStamp'] =datetime.strptime(event['datetime'] + tz_str, DATETIME_FORMAT)
ppr(event) print("Print Events: ")
digitools.createBasicEvent(event, event_type, venue) digitools.createBasicEvent(event, event_type, venue)
except Exception as e: except Exception as e:
# print(e) print("Error: ", e)
pass pass
if len(sys.argv) >= 2: if len(sys.argv) >= 2:

View File

@@ -21,9 +21,8 @@ venue, created = Organization.objects.get_or_create(
) )
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp') scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%B %d %Y %I:%M%p' DATETIME_FORMAT = '%B %d %Y %I:%M%p %z %Z'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
def get_events(ps): def get_events(ps):
contents = ps.xpath('.//*/ul[@class="events-list"]/li') contents = ps.xpath('.//*/ul[@class="events-list"]/li')
@@ -38,12 +37,11 @@ def get_events(ps):
event['title'] = c.xpath('.//div/h4/a/text()')[0] event['title'] = c.xpath('.//div/h4/a/text()')[0]
event['date'] = [month, day, str(year), c.xpath('.//div[@class="event-info"]/p/text()')[0].split(" ")[0]] event['date'] = [month, day, str(year), c.xpath('.//div[@class="event-info"]/p/text()')[0].split(" ")[0]]
event['date'] = " ".join(event['date']) event['date'] = " ".join(event['date'])
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT) event['dateStamp'] =datetime.strptime(event['date'] +tz_str, DATETIME_FORMAT)
event['link'] = c.xpath('.//div[@class="event-info"]/h4/a/@href')[0] event['link'] = c.xpath('.//div[@class="event-info"]/h4/a/@href')[0]
if " presents" in event['title']: if " presents" in event['title']:
event['title'] = event['title'].split("presents")[1][1:].strip() event['title'] = event['title'].split("presents")[1][1:].strip()
if event['title'].startswith('.'): if event['title'].startswith('.'):
print("BLAHH\n")
event['title'] = event['title'][1:].strip() event['title'] = event['title'][1:].strip()
digitools.createBasicEvent(event, "Mu", venue) digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1 scraper.items+=1

View File

@@ -22,12 +22,8 @@ venue, created = Organization.objects.get_or_create(
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp') scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
event_type = "" tz_str = " -0600 UTC"
DATETIME_FORMAT = '%B %d @ %I:%M %p %Y %z %Z'
# Time Signatures
tz = pytz.timezone("US/Central")
DATETIME_FORMAT_2 = '%b %d %Y %I:%M %p'
DATETIME_FORMAT = '%B %d @ %I:%M %p %Y'
def get_events(ps, event_type): def get_events(ps, event_type):
contents = ps.xpath('.//*/article') contents = ps.xpath('.//*/article')
@@ -41,15 +37,13 @@ def get_events(ps, event_type):
year = int(year) + 1 year = int(year) + 1
event['scraper'] = scraper event['scraper'] = scraper
event['calendars'] = [scraper.calendar] event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/h3/a/text()')[0].replace("\n", "").replace("\t", "") event['title'] = c.xpath('.//*/h4/a/text()')[0].replace("\n", "").replace("\t", "")
event['date'] = " ".join([ dateTime, str(year)]) event['date'] = " ".join([ dateTime, str(year)])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT) event['dateStamp'] = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT)
event['link'] = c.xpath('.//*/h3/a/@href')[0] event['link'] = c.xpath('.//*/h4/a/@href')[0]
digitools.createBasicEvent(event, event_type, venue) digitools.createBasicEvent(event, event_type, venue)
except Exception as e: except Exception as e:
print(e) print(e)
ppr(event)
print("\n\n+++\n\n")
if len(sys.argv) >= 2: if len(sys.argv) >= 2:
arg1 = sys.argv[1] arg1 = sys.argv[1]

View File

@@ -11,7 +11,6 @@ from pprint import pprint as ppr
import pytz import pytz
from events.models import Organization, Scraper from events.models import Organization, Scraper
import events.digitools as digitools import events.digitools as digitools
venue, created = Organization.objects.get_or_create( venue, created = Organization.objects.get_or_create(
@@ -23,7 +22,6 @@ venue, created = Organization.objects.get_or_create(
scraper,item_count_start, virtcal = digitools.getScraper(venue, venue.website, 'msp') scraper,item_count_start, virtcal = digitools.getScraper(venue, venue.website, 'msp')
# Time Signatures
tz = pytz.timezone("US/Central") tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p' DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'

View File

@@ -0,0 +1,75 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0800 UTC"
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p %z %Z'
DATETIME_FORMAT_2 = '%A, %B %d, %Y %I%p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Mississippi Studios",
city="Portland",
website="https://mississippistudios.com/full-view/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'pdx')
scraper.items = 0
scraper.save()
def get_events(contents, event_type):
contents = ps.xpath('.//*/div[@class="event"]/div/div[@class="event__inner"]')
i = 0
for c in contents:
i += 1
try:
event = {}
date = c.xpath('.//*/h5/text()')[0].replace("\n", "").replace("\t", "")#.replace("rd", "").replace("nd", "").strip()
time = c.xpath('//div/div/div[4]/p[1]/span/text()')[i].split("/")[0].replace("Doors: ", "").strip()
event['date'] = (' ').join([date, time, tz_str])
event['date'] = (' ').join([date, time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/h2/a/text()')[0]
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT_2)
event['link'] = c.xpath('.//*/h2/a/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
link = venue.website + "2"
print(link)
ps = digitools.getSource(br, link)
get_events(ps, "Mu")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,75 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0800 UTC"
DATETIME_FORMAT = '%a, %B %d, %Y %I:%M %p %z %Z'
DATETIME_FORMAT_2 = '%a, %B %d, %Y %I%p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Revolution Hall",
city="Portland",
website="https://www.revolutionhall.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'pdx')
scraper.items = 0
scraper.save()
def get_events(contents, event_type, bar):
for c in contents:
try:
event = {}
date = c.xpath('.//*/span[@class="event-date--full"]/text()')[0].replace("th", "").replace("st", "").replace("rd", "").replace("nd", "").strip()
time = c.xpath('.//*/span[@class="event-doors-showtime"]/text()')[0].split("/")[0].replace("Doors: ", "").strip()
if not "Show" in time:
event['date'] = (' ').join([date, time, tz_str])
else:
time = time.replace("Show: ", "").strip()
event['date'] = (' ').join([date, time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/div[@class="event__content"]/h3/a/text()')[0]
if bar == "Show Bar":
event['title'] = c.xpath('.//*/div[@class="event__content"]/h3/a/text()')[0] + " (Show Bar)"
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT_2)
event['link'] = c.xpath('.//*/div[@class="event__content"]/h3/a/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
print("\nSuccess\n")
except Exception as e:
print("\nError: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
contents = ps.xpath('.//*/div[@class="event-wrapper revolution-hall"]')
get_events(contents, "Mu", "Rev Hall")
contents = ps.xpath('.//*/div[@class="event-wrapper show-bar"]')
get_events(contents, "Mu", "Show Bar")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,68 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0800 UTC"
DATETIME_FORMAT = '%a, %b %d %Y %I:%M%p %z %Z'
DATETIME_FORMAT_2 = '%a, %b %d %Y %I %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Roseland Theater",
city="Portland",
website="https://roselandpdx.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'pdx')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="col-12 eventWrapper rhpSingleEvent py-4 px-0 rhp-event__single-event--list"]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/div[@id="eventDate"]/text()')[0].replace("\n", "").replace("\t", "")
time = c.xpath('.//*/span[@class="font0by75 fontWeight500 lineHeight15 rhp-event__time-text--list"]/text()')[0].split(" ")[:3]
time = (" ").join(time).replace("Doors: ", "").replace("\n", "").replace("\t", "")
year = datetime.now().year
event['date'] = (' ').join([date, str(year), time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/a[@id="eventTitle"]/h2/text()')[0].replace("\n", "").replace("\t", "")
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT_2)
event['link'] = c.xpath('.//*/a[@id="eventTitle"]/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,72 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0800 UTC"
DATETIME_FORMAT = '%a, %b %d %Y %I:%M%p %z %Z'
DATETIME_FORMAT_2 = '%a, %b %d %Y %I %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Roseland Theater",
city="Portland",
website="https://roselandpdx.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'pdx')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="col-12 eventWrapper rhpSingleEvent py-4 px-0 rhp-event__single-event--list"]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/div[@id="eventDate"]/text()')[0].replace("\n", "").replace("\t", "")
time = c.xpath('.//*/span[@class="font0by75 fontWeight500 lineHeight15 rhp-event__time-text--list"]/text()')[0].split(" ")[:3]
time = (" ").join(time).replace("Doors: ", "").replace("\n", "").replace("\t", "")
year = datetime.now().year
event['date'] = (' ').join([date, str(year), time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/a[@id="eventTitle"]/h2/text()')[0].replace("\n", "").replace("\t", "")
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT_2)
event['link'] = c.xpath('.//*/a[@id="eventTitle"]/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
# ppr(event)
print("\nSuccess\n")
except Exception as e:
print("\nError: ", e)
# ppr(event)
# print("\n+++\n")
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,67 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0800 UTC"
DATETIME_FORMAT = '%b %d, %Y %I:%M %p %z %Z'
DATETIME_FORMAT_2 = '%a, %b %d %Y %I %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Volcanic Theater",
city="Portland",
website="https://www.volcanictheatre.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'pdx')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="event-card"]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/div[@class="date"]/text()')[0]
time = c.xpath('.//*/div[@class="doors-open"]/div/text()')[1]
event['date'] = (' ').join([date, time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//div[@class="headline"]/text()')
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except Exception as e:
print(e)
pass
event['link'] = c.xpath('.//a/@href')
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -1,7 +1,5 @@
#!/bin/bash #!/bin/bash
BASEDIR=/var/www/digisnaxx.ado/api.digisnaxx
EVENTSDIR=/var/www/digisnaxx.ado/api.digisnaxx/events
SCRIPTDIR=/var/www/digisnaxx.ado/api.digisnaxx/events/scrapers/scripts SCRIPTDIR=/var/www/digisnaxx.ado/api.digisnaxx/events/scrapers/scripts
GOVTDIR=/var/www/digisnaxx.ado/api.digisnaxx/events/scrapers/Working/govt GOVTDIR=/var/www/digisnaxx.ado/api.digisnaxx/events/scrapers/Working/govt

View File

@@ -1,7 +1,5 @@
#!/bin/bash #!/bin/bash
BASEDIR=/var/www/digisnaxx.ado/api.digisnaxx
DJANGODIR=/var/www/digisnaxx.ado/api.digisnaxx/events
SCRIPTDIR=/var/www/digisnaxx.ado/api.digisnaxx/events/scrapers/scripts SCRIPTDIR=/var/www/digisnaxx.ado/api.digisnaxx/events/scrapers/scripts
ICALDIR=/var/www/digisnaxx.ado/api.digisnaxx/events/scrapers/Working/iCal ICALDIR=/var/www/digisnaxx.ado/api.digisnaxx/events/scrapers/Working/iCal

View File

@@ -1,10 +1,9 @@
import re, os, sys import re, os, sys
from datetime import datetime, timedelta from datetime import datetime, timedelta
import django sys.path.append('/var/www/digisnaxx.ado/scrapers')
sys.path.append('../../../') import dtss
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local' dtss.getReady()
django.setup()
from events.models import Event,Organization, Promo, Calendar from events.models import Event,Organization, Promo, Calendar