more scrapers

This commit is contained in:
2026-01-24 19:01:47 -05:00
parent 7013d8327a
commit 3c4a41ae2c
58 changed files with 1614 additions and 2988 deletions

66
Working/govt/co.denver.py Normal file
View File

@@ -0,0 +1,66 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Denver City Council",
city="Denver",
website="https://denver.legistar.com/Calendar.aspx",
is_venue=True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'dia')
event_type = "Gv"
# Time Signatures
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%m/%d/%Y %I:%M %p %z %Z'
DATETIME_FORMAT_2 = '%B %d, %Y'
def get_events(ps, event_type):
print("Getting events ...")
contents = ps.xpath('.//*/tr[@class="rgRow"]')
for c in contents:
try:
event = {}
event['scraper'] = scraper
event['link'] = venue.website
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/a/text()')[0]
event['time'] = c.xpath('.//*/span/text()')[2]
event['date'] = c.xpath('.//td[@class="rgSorted"]/text()')[0]
event['dateStamp'] = datetime.strptime(event['date']+" "+event['time'] + tz_str, DATETIME_FORMAT)
ppr(event)
digitools.createBasicEvent(event, event_type, venue)
scraper.items+=1
except Exception as e:
print(e)
ppr(event)
print("\n\n+++\n\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
ps = digitools.getSource(br, venue.website)
sleep(1)
get_events(ps, "Gv")
sleep(2)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -20,13 +20,13 @@ venue, created = Organization.objects.get_or_create(
is_venue=True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'bos')
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'bos')
event_type = "Gv"
# Time Signatures
tz = pytz.timezone("US/Eastern")
DATETIME_FORMAT = '%B %d, %Y %I:%M%p'
tz_str = " -0500 UTC"
DATETIME_FORMAT = '%B %d, %Y %I:%M%p %z %Z'
def get_events(ps, event_type):
print("Getting events ...")
@@ -40,7 +40,7 @@ def get_events(ps, event_type):
event['title'] = c.xpath('.//*/div[@class="n-li-t"]/a/text()')[0]
event['link'] = venue.website + c.xpath('.//*/div[@class="n-li-t"]/a/@href')[0]
event['date'] = c.xpath('.//*/li[@class="dl-i"]/span[@class="dl-d"]/text()')[0].replace('\n', '').split("-")[0].strip()
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['dateStamp'] = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT)
digitools.createBasicEvent(event, event_type, venue)
ppr(event)
scraper.items+=1

View File

@@ -35,8 +35,8 @@ except Exception as e:
print("Scraper: ", scraper)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p %z %Z'
# Set initial variables for City, etc
calendar_url = 'https://www.leg.mn.gov/cal?type=all'
@@ -76,8 +76,8 @@ for hE in houseEvents:
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT).date(),
# more_details = details['location'],
venue = venue,
scraper = scraper
)
@@ -113,8 +113,8 @@ for sE in senateEvents:
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT).date(),
# more_details = details['location'],
venue = venue,
scraper = scraper
)
@@ -147,8 +147,8 @@ for cE in commEvents:
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT).date(),
# more_details = details['location'],
venue = venue,
scraper = scraper
)

View File

@@ -49,6 +49,8 @@ gcal = iCalendar.from_ical(objIcalData.text)
cfpa_events = []
tz = pytz.timezone("US/Central")
tz_str = " -0600 UTC"
for component in gcal.walk():
event = {}
@@ -84,8 +86,8 @@ for component in gcal.walk():
event_type = event_type,
show_title = event['strSummary'],
show_link = venue.website,
show_date = event['dateStart']-td,
show_day = event['dateStart']-td,
show_date = event['dateStart'],
show_day = event['dateStart'],
more_details = event["details"],
venue = venue
)

View File

@@ -36,11 +36,15 @@ except Exception as e:
scraper = Scraper.objects.get(name="Mpls City Council")
print("Scraper: ", scraper)
DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S'
DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S %z %Z'
tz = pytz.timezone("US/Central")
td = timedelta(days=2)
odt = datetime.now() - td
tz_str = " -0600 UTC"
month = odt.strftime("%b")
day = int(datetime.now().day)
@@ -74,7 +78,7 @@ for event in new_events:
e = {}
e['title'] = event['CommitteeName']
e['link'] = scraper.website
e['dateStamp'] = datetime.strptime(event['MeetingTime'], DATETIME_FORMAT)
e['dateStamp'] = datetime.strptime(event['MeetingTime'] + tz_str, DATETIME_FORMAT)
e['agendaStatus'] = event['AgendaStatus']
e['address'] = event['Address']
e['description'] = event['Description']

View File

@@ -32,8 +32,9 @@ except Exception as e:
print("Scraper: ", scraper)
tz = pytz.timezone("US/Central")
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%B %d, %Y at %I:%M %p'
DATETIME_FORMAT = '%B %d, %Y at %I:%M %p %z %Z'
calendar_url = 'https://www.stpaul.gov/calendar'
city_site = "https://www.stpaul.gov"
@@ -64,7 +65,7 @@ def getEvents(br):
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT),
venue = venue,
scraper = scraper

View File

@@ -20,14 +20,16 @@ venue, created = Organization.objects.get_or_create(
is_venue=True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'pdx')
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'pdx')
event_type = "Gv"
# Time Signatures
tz = pytz.timezone("US/Eastern")
DATETIME_FORMAT = '%B %d, %Y %I:%M %p'
DATETIME_FORMAT_2 = '%B %d, %Y'
tz_str = " -0800 UTC"
DATETIME_FORMAT = '%B %d, %Y %I:%M %p %z %Z'
DATETIME_FORMAT_2 = '%B %d, %Y %z %Z'
def get_events(ps, event_type):
print("Getting events ...")
@@ -44,9 +46,9 @@ def get_events(ps, event_type):
event['time'] = c.xpath('.//*/span[@class="pe-4"]/text()')[0].replace("\n", "").strip()
# event['time2'] = c.xpath('.//*/span[@class="pe-4"]/text()')
try:
event['dateStamp'] = datetime.strptime(event['date']+" "+event['time'], DATETIME_FORMAT)
event['dateStamp'] = datetime.strptime(event['date']+" "+event['time'] + tz_str, DATETIME_FORMAT)
except:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT_2)
event['dateStamp'] = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT_2)
# ppr(event)

View File

@@ -20,13 +20,14 @@ venue, created = Organization.objects.get_or_create(
is_venue=True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'phl')
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'phl')
event_type = "Gv"
# Time Signatures
tz = pytz.timezone("US/Eastern")
DATETIME_FORMAT = '%m/%d/%Y %I:%M %p'
tz_str = " -0500 UTC"
DATETIME_FORMAT = '%m/%d/%Y %I:%M %p %z %Z'
DATETIME_FORMAT_2 = '%B %d, %Y'
def get_events(ps, event_type):
@@ -41,7 +42,7 @@ def get_events(ps, event_type):
event['title'] = c.xpath('.//*/a/text()')[0]
event['time'] = c.xpath('.//*/span/text()')[2]
event['date'] = c.xpath('.//td[@class="rgSorted"]/text()')[0]
event['dateStamp'] = datetime.strptime(event['date']+" "+event['time'], DATETIME_FORMAT)
event['dateStamp'] = datetime.strptime(event['date']+" "+event['time'] + tz_str, DATETIME_FORMAT)
# ppr(event)
digitools.createBasicEvent(event, event_type, venue)
scraper.items+=1

View File

@@ -0,0 +1,53 @@
import requests, os, sys
from icalendar import Calendar as iCalendar, Event
from datetime import datetime, timedelta
from dateutil import relativedelta
td = relativedelta.relativedelta(hours=5)
from pprint import pprint as ppr
import pytz
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from events.models import Event as DSEvent, Organization, Scraper, Calendar
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Yoga MDE",
city="Medellin",
website="https://digisnaxx.com/",
)
try:
scraper, created = Scraper.objects.get_or_create(
name="Yoga MDE",
website="https://calendar.google.com/calendar/ical/721a61e06e9a96b7bd30cdb7439aa7791829c316bb8b77237ea93cf41d5396b4%40group.calendar.google.com/public/basic.ics",
calendar = Calendar.objects.get(shortcode='mde'),
items = 0,
new_items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name=venue.name)
item_count_start = scraper.items
objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text.strip())
counter = 0
digitools.getiCalRepeateEvents(gcal, scraper, venue, "Ex", "mde")
digitools.updateScraper(scraper, item_count_start)
new_time = datetime.now() - timedelta(days=1)
right_bound_time = datetime.now() + timedelta(days=45)
events = DSEvent.objects.filter(show_date__lte=new_time)
events1 = DSEvent.objects.filter(show_date__gte=right_bound_time)
for e in events:
e.delete()
for e in events1:
e.delete()

View File

@@ -0,0 +1,55 @@
import requests, os, sys
from icalendar import Calendar as iCalendar, Event
from datetime import datetime, timedelta
from dateutil import relativedelta
td = relativedelta.relativedelta(hours=5)
from pprint import pprint as ppr
import pytz
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from events.models import Event as DSEvent, Organization, Scraper, Calendar
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="idioki",
city="Medellin",
website="https://idioki.com/",
)
try:
scraper, created = Scraper.objects.get_or_create(
name="Yoga MDE",
website="",
calendar = Calendar.objects.get(shortcode='mde'),
items = 0,
new_items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name=venue.name)
event_type = "Mu"
item_count_start = scraper.items
objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text)
counter = 0
digitools.getiCalRepeateEvents(gcal, scraper, venue, "Ed", "mde")
digitools.updateScraper(scraper, item_count_start)
new_time = datetime.now() - timedelta(days=1)
right_bound_time = datetime.now() + timedelta(days=45)
events = DSEvent.objects.filter(show_date__lte=new_time)
events1 = DSEvent.objects.filter(show_date__gte=right_bound_time)
for e in events:
e.delete()
for e in events1:
e.delete()

View File

@@ -1,7 +1,7 @@
import requests, os, sys
from icalendar import Calendar as iCalendar, Event
from datetime import datetime
from datetime import datetime, timedelta
from dateutil import relativedelta
td = relativedelta.relativedelta(hours=5)
@@ -34,13 +34,11 @@ except Exception as e:
print(e)
scraper = Scraper.objects.get(name=venue.name)
event_type = "Mu"
item_count_start = scraper.items
objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text)
counter = 0
digitools.getiCalRepeateEvents(gcal, scraper, venue, "Ed", cal)
digitools.getiCalRepeateEvents(gcal, scraper, venue, "Ed", "mde")
digitools.updateScraper(scraper, item_count_start)
new_time = datetime.now() - timedelta(days=1)

View File

@@ -1,7 +1,7 @@
import requests, os, sys
from icalendar import Calendar as iCalendar, Event
from datetime import datetime
from datetime import datetime, timedelta
from dateutil import relativedelta
td = relativedelta.relativedelta(hours=5)
@@ -40,17 +40,18 @@ objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text)
counter = 0
digitools.getiCalRepeateEvents(gcal, scraper, venue, "Ed", cal)
print("Got Gcal")
digitools.getiCalRepeateEvents(gcal, scraper, venue, "Ed", "000")
print("Got EVents")
digitools.updateScraper(scraper, item_count_start)
print("Updated")
# new_time = datetime.now() - timedelta(days=1)
# right_bound_time = datetime.now() + timedelta(days=45)
# events = DSEvent.objects.filter(show_date__lte=new_time)
# events1 = DSEvent.objects.filter(show_date__gte=right_bound_time)
new_time = datetime.now() - timedelta(days=1)
right_bound_time = datetime.now() + timedelta(days=45)
events = DSEvent.objects.filter(show_date__lte=new_time)
events1 = DSEvent.objects.filter(show_date__gte=right_bound_time)
# for e in events:
# e.delete()
for e in events:
e.delete()
for e in events1:
e.delete()
# for e in events1:
# e.delete()

View File

@@ -21,13 +21,19 @@ venue, created = Organization.objects.get_or_create(
website="https://chicityclerkelms.chicago.gov/Meetings/",
)
website="https://calendar.google.com/calendar/ical/chicagolegislativereference%40gmail.com/public/basic.ics",
website="https://calendar.google.com/calendar/ical/chicagolegislativereference%40gmail.com/public/basic.ics"
# website="https://calendar.google.com/calendar/ical/chicagolegislativereference%40gmail.com/public/basic.ics"
scraper,item_count_start,virtcal = digitools.getScraper(venue, website, 'chi')
print("Got Scraper")
event_type = "Gv"
item_count_start = scraper.items
objIcalData = requests.get(scraper.website)
objIcalData = requests.get(website)
print("Got Obj")
gcal = iCalendar.from_ical(objIcalData.text)
print("Got Gcal")
events = digitools.getiCalEvents(gcal, scraper, venue, event_type)
digitools.buildiCalEvents(events, event_type)
print("Got Events")
digitools.buildiCalEvents(events, event_type, scraper, venue)
digitools.updateScraper(scraper, item_count_start)

View File

@@ -32,5 +32,5 @@ objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text)
events = digitools.getiCalEvents(gcal, scraper, venue, event_type)
digitools.buildiCalEvents(events, event_type)
digitools.buildiCalEvents(events, event_type, scraper, venue)
digitools.updateScraper(scraper, item_count_start)

View File

@@ -28,5 +28,5 @@ item_count_start = scraper.items
objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text)
events = digitools.getiCalEvents(gcal, scraper, venue, event_type)
digitools.buildiCalEvents(events, event_type)
digitools.buildiCalEvents(events, event_type, scraper, venue)
digitools.updateScraper(scraper, item_count_start)

View File

@@ -28,5 +28,5 @@ item_count_start = scraper.items
objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text)
events = digitools.getiCalEvents(gcal, scraper, venue, event_type)
digitools.buildiCalEvents(events, event_type)
digitools.buildiCalEvents(events, event_type, scraper, venue)
digitools.updateScraper(scraper, item_count_start)

View File

@@ -28,5 +28,5 @@ item_count_start = scraper.items
objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text)
events = digitools.getiCalEvents(gcal, scraper, venue, "Mu")
digitools.buildiCalEvents(events, event_type)
digitools.buildiCalEvents(events, event_type, scraper, venue)
digitools.updateScraper(scraper, item_count_start)

View File

@@ -29,5 +29,5 @@ item_count_start = scraper.items
objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text)
events = digitools.getiCalEvents(gcal, scraper, venue, event_type)
digitools.buildiCalEvents(events, event_type)
digitools.buildiCalEvents(events, event_type, scraper, venue)
digitools.updateScraper(scraper, item_count_start)

View File

@@ -28,5 +28,5 @@ item_count_start = scraper.items
objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.text)
events = digitools.getiCalEvents(gcal, scraper, venue, event_type)
digitools.buildiCalEvents(events, event_type)
digitools.buildiCalEvents(events, event_type, scraper, venue)
digitools.updateScraper(scraper, item_count_start)

View File

@@ -28,6 +28,9 @@ scraper,item_count_start,virtcal = digitools.getScraper(venue, website, 'msp')
item_count_start = scraper.items
objIcalData = requests.get(scraper.website)
gcal = iCalendar.from_ical(objIcalData.content)
print("Got gcal")
events = digitools.getiCalEvents(gcal, scraper, venue, event_type)
digitools.buildiCalEvents(events, event_type)
print("Got Events")
digitools.buildiCalEvents(events, event_type, scraper, venue)
print("Built Events")
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,86 @@
import os, sys
from datetime import datetime, timedelta
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from selenium.webdriver.common.by import By
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "+0100 UTC"
DATETIME_FORMAT = '%B %d %Y %H:%M %z %Z'
venue, created = Organization.objects.get_or_create(
name="Arena Wien",
city="Vienna",
website="https://arena.wien/Home/Programm",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'vie')
scraper.items = 0
scraper.save()
def getSite(br, website):
ps = digitools.getSource(br, website)
get_events(ps, "Mu")
sleep(3)
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="suite_calRowContainer "]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/span[@class="suite_datePlate"]/span/text()')
time = c.xpath('.//*/span[@class="col-md-2 suite_EvenTime"]/span/text()')
time = [x.replace("\n", "").strip() for x in time][2]
day, month, year = date[0].strip(), date[2].split("|")[0].replace(".", "").strip(), date[2].split("|")[1].strip()
month = digitools.translateMonth(month)
event['date'] = (' ').join([month, day, year, time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/span[@class="Event_H1"]/text()')[0]
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except Exception as e:
print(e)
pass
event['link'] = c.xpath('.//a/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
ppr(event)
# print("\n+++\n")
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
month = datetime.now().month
year = datetime.now().year
odt_next_month = datetime.now() + relativedelta.relativedelta(months=1)
next_month = odt_next_month.month
website = "https://arena.wien/Home/Programm#data_abonnement=-1&data_month={}&data_year={}".format(str(month), str(year))
website_2 = "https://arena.wien/Home/Programm#data_abonnement=-1&data_month={}&data_year={}".format(str(next_month), str(year))
getSite(br,website)
getSite(br,website_2)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,81 @@
import os, sys
from datetime import datetime, timedelta
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from selenium.webdriver.common.by import By
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "+0100 UTC"
DATETIME_FORMAT = '%B %d %Y %H:%M %z %Z'
venue, created = Organization.objects.get_or_create(
name="City Site",
city="Vienna",
website="https://www.wien.gv.at/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'vie')
scraper.items = 0
scraper.save()
def getSite(br, website):
ps = digitools.getSource(br, website)
get_events(ps, "Mu")
sleep(3)
def get_events(ps, event_type):
contents = ps.xpath('.//*/wm-card')
for c in contents:
try:
event = {}
date = c.xpath('.//*/ul/li/text()')
# time = c.xpath('.//*/span[@class="col-md-2 suite_EvenTime"]/span/text()')
# time = [x.replace("\n", "").strip() for x in time][2]
# day, month, year = date[0].strip(), date[2].split("|")[0].replace(".", "").strip(), date[2].split("|")[1].strip()
# month = digitools.translateMonth(month)
event['date'] = (' ').join([month, day, year, time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/h3/a/text()')[0]
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except Exception as e:
print(e)
pass
event['link'] = c.xpath('.//a/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
ppr(event)
# print("\n+++\n")
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
site_append = "veranstaltungen/suche?q=&facet={}&page={}".format('this month', 1)
website = venue.website + site_append
getSite(br,website)
# getSite(br,website_2)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,77 @@
import os, sys
from datetime import datetime, timedelta
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from selenium.webdriver.common.by import By
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "+0100 UTC"
DATETIME_FORMAT = '%b %d %Y %H:%M %z %Z'
venue, created = Organization.objects.get_or_create(
name="Flex",
city="Vienna",
website="https://flex.at/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'vie')
scraper.items = 0
scraper.save()
def getSite(br, website):
ps = digitools.getSource(br, website)
get_events(ps, "Mu")
sleep(3)
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="ectbe-inner-wrapper ectbe-simple-event "]')
for c in contents:
try:
event = {}
date = c.xpath('.//div[@class="ectbe-date-wrp elementor-repeater-item-dd81c19"]/span/text()')
time = c.xpath('.//*/div[@class="ectbe-evt-time elementor-repeater-item-fd61cbc"]/text()')[0].split('-')[0].strip()
year = datetime.now().year
if date[1] == 'März':
date[1] = "Mar"
event['date'] = (' ').join([date[1].replace(".", ""), date[0], str(year), time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/h2/text()')[0]
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except Exception as e:
print(e)
pass
event['link'] = c.xpath('.//a[@class="ectbe-evt-read-more"]/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
# ppr(event)
except Exception as e:
print("\nError: ", e)
ppr(event)
# print("\n+++\n")
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
getSite(br, venue.website)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,66 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0600 UTC"
DATETIME_FORMAT = '%a %B %d %Y %I:%M%p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Empty Bottle",
city="Chicago",
website="https://www.emptybottle.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'chi')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="eb-item"]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/div[@class="date"]/text()')[0]
time = c.xpath('.//*/div[@class="start-time"]/text()')[0]
year = datetime.now().year
event['date'] = (' ').join([date, str(year), time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/div[@class="title"]/text()')[0]
event['venue'] = c.xpath('.//*/a[@class="venue"]/text()')[0]
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = venue.website
if event['venue'] == 'Empty Bottle':
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
# ppr(event)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,68 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz = pytz.timezone("US/Central")
tz_str = "-0600 UTC"
DATETIME_FORMAT = '%a, %b %d, %Y %I:%M%p %z %Z'
DATETIME_FORMAT_2 = '%a, %b %d, %Y %I %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Hideout - Chicago",
city="Chicago",
website="https://hideoutchicago.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'chi')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="col-12 eventWrapper rhpSingleEvent py-4 px-0 rhp-event__single-event--list"]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/div[@id="eventDate"]/text()')[0].replace("\n", "").replace("\t", "")
time = c.xpath('.//*/span[@class="font0by75 fontWeight500 lineHeight15 rhp-event__time-text--list"]/text()')[0].split("//")[1].replace("Doors: ", "").replace("\n", "").replace("\t", "")
event['date'] = (' ').join([date, time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/a[@id="eventTitle"]/h2/text()')[0].replace("\n", "").replace("\t", "")
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT_2)
event['link'] = c.xpath('.//*/a[@id="eventTitle"]/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
# ppr(event)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,76 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz = pytz.timezone("US/Central")
tz_str = "-0600 UTC"
DATETIME_FORMAT = '%Y-%m-%d %I:%M %p %z %Z'
# DATETIME_FORMAT_2 = '%a, %b %d, %Y %I %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Reggies",
city="Chicago",
website="https://www.reggieslive.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'chi')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/article')
for c in contents:
try:
event = {}
date = c.xpath('.//*/time/@datetime')[0]
time = c.xpath('.//*/li[@class="first"]/text()')[0]
presented_by = c.xpath('.//*/p[@class="presented-by"]/text()')[0]
event['date'] = (' ').join([date, time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/hgroup/h2/text()')[0]
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except:
event['date'] = (' ').join([date, time.replace("Doors", "").strip(), tz_str])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = venue.website
if "omedy" in presented_by:
digitools.createBasicEvent(event, "Co", venue)
elif any(word in presented_by for word in ["game", "bingo", "Sports"]):
digitools.createBasicEvent(event, "Co", venue)
else:
digitools.createBasicEvent(event, "Mu", venue)
except Exception as e:
print("\nError: ", e)
# ppr(event)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
themes = []
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(1)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,73 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz = pytz.timezone("US/Central")
tz_str = "-0600 UTC"
DATETIME_FORMAT = '%a %b %d %Y %I:%M %p %z %Z'
# DATETIME_FORMAT_2 = '%a, %b %d, %Y %I %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Subterranean",
city="Chicago",
website="https://subt.net/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'chi')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="event-info-block"]')
for c in contents:
try:
event = {}
date = c.xpath('.//p[@class="fs-18 bold mt-1r event-date"]/text()')[0]
time = c.xpath('.//*/span[@class="door-time"]/text()')[0].replace("Doors: ", "").replace("/", "").strip()
year = datetime.now().year
event['date'] = (' ').join([date, str(year), time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//p[@class="fs-18 bold mb-12 event-title"]/a/text()')[0]
try:
event['support'] = c.xpath('.//p[@class="fs-12 supporting-talent"]/text()')[0]
except:
pass
event['venue'] = c.xpath('.//p[@class="fs-12 venue"]/text()')[0]
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = venue.website
if len(event['support']) > 0:
event['new_title'] = event['title'] + " w/ " + event['support']
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
# ppr(event)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,67 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0600 UTC"
DATETIME_FORMAT = '%a %B %d %Y %I:%M%p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Thalia Hall",
city="Chicago",
website="https://www.thaliahallchicago.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'chi')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="eb-item"]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/div[@class="date"]/text()')[0]
time = c.xpath('.//*/div[@class="start-time"]/text()')[0].replace("Doors: ", "")
year = datetime.now().year
event['date'] = (' ').join([date, str(year), time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/div[@class="title"]/text()')[0]
event['venue'] = c.xpath('.//*/a[@class="venue"]/text()')[0]
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = venue.website
if event['venue'] == 'Thalia Hall':
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,65 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0500 UTC"
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Asylum Comedy",
city="Boston",
website="https://calendar.improvasylum.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'bos')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="space-y-4"]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/h3[@class="text-lg font-extrabold uppercase"]/text()')[0]
time = c.xpath('.//*/p[@class="flex items-center gap-2"]/text()')[0].split("-")[0]
venue_str = c.xpath('.//*/p[@class="flex items-center gap-2"]/text()')[1]
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/h4[@class="text-lg font-extrabold transition-colors"]/text()')[0]
event['date'] = [date, time, tz_str]
event['date'] = " ".join(event['date'])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = venue.website
digitools.createBasicEvent(event, "Co", venue)
scraper.items+=1
print("\nSuccessn")
except Exception as e:
print("\nError: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Co")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,68 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0500 UTC"
DATETIME_FORMAT = '%A, %B %d %Y %I:%M %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="JP Centre Yoga",
city="Boston",
website="https://www.jpcentreyoga.com/yoga-class-schedule",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'bos')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="bw-widget__day"]')
for cn in contents:
date = cn.xpath('.//div[1]/text()')[0]
events = cn.xpath('.//div[@class="bw-session"]')
for c in events:
try:
event = {}
time = c.xpath('.//*/time[@class="hc_starttime"]/text()')[0]
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/div[@class="bw-session__name"]/text()')[1].replace("\n", "").strip()
year = datetime.now().year
event['date'] = [date, str(year), time, tz_str]
event['date'] = " ".join(event['date'])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = venue.website
event, created = digitools.createBasicEvent(event, "Ex", venue)
if "o n l i n e" in event.show_title:
event = digitools.add_calendar(event, "000")
scraper.items+=1
except Exception as e:
print("\nError: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Co")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,70 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0500 UTC"
DATETIME_FORMAT = '%m.%d %Y %I:%M%p %z %Z'
venue, created = Organization.objects.get_or_create(
name="The Middle East",
city="Boston",
website="https://mideastclub.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'bos')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="tw-section"]/div[@class="row"]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/span[@class="tw-event-date"]/text()')[0]
time = c.xpath('.//*/span[@class="tw-event-time"]/text()')[0].replace("Show: ", "")
year = datetime.now().year
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/div[@class="tw-name"]/a/text()')[0]
event['details'] = c.xpath('.//*/span[@class="tw-venue-name"]/text()')[0].replace("\n", "").strip()
event['date'] = [date, str(year), time, tz_str]
event['date'] = " ".join(event['date'])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = c.xpath('.//*/div[@class="tw-info-price-buy-tix"]/a/@href')[0]
if "Middle East" in event['details']:
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
ppr(event)
print("\n+++\n")
except Exception as e:
print("\nError: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
links = ["https://mideastclub.com/page/1/", "https://mideastclub.com/page/2/", "https://mideastclub.com/page/3/", "https://mideastclub.com/page/4/"]
for link in links:
ps = digitools.getSource(br, link)
get_events(ps, "Co")
digitools.updateScraper(scraper, item_count_start)
br.close()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
import os, sys
from datetime import datetime
from datetime import datetime, timedelta
from dateutil import relativedelta
import json
@@ -12,9 +12,10 @@ dtss.getReady()
from time import sleep
from pprint import pprint as ppr
from zoneinfo import ZoneInfo
import pytz
from events.models import Organization, Scraper, Calendar
from events.models import Event, Organization, Scraper, Calendar
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
@@ -25,9 +26,14 @@ venue, created = Organization.objects.get_or_create(
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'mde')
tz_str = " -0500 UTC"
DATETIME_FORMAT = '%d/%m/%y %I:%M %p %z %Z'
# DATETIME_FORMAT = '%B %d %Y %I:%M%p %z %Z'
# DATETIME_FORMAT = '%B %d %Y %I:%M %p %z %Z'
# DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y %z %Z'
DATETIME_FORMAT = '%B %d %Y %I:%M%p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
td = timedelta(days=31)
future_date = datetime.now(ZoneInfo("America/Chicago")) + td
# with open('data.json') as f:
# totalLinks = json.load(f)
@@ -52,18 +58,18 @@ def getLinks(br, url, links):
# newlinks = ps.xpath('.//*/div[@class="e-con-inner"]/*/a/@href')
events = ps.xpath('.//*/div[@class="e-con-inner"]')
for event in events:
e = {}
ev = {}
try:
e['link'] = event.xpath('.//*/a/@href')[0]
e['title'] = event.xpath('.//*/h3/a/text()')[0]
e['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0].replace('\n', '').replace('\t', '')
ev['link'] = event.xpath('.//*/a/@href')[0]
ev['title'] = event.xpath('.//*/h3/a/text()')[0]
ev['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0].replace('\n', '').replace('\t', '')
# e['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0]
e['venueLink'] = event.xpath('.//*/ul/li/a/@href')[1]
ev['venueLink'] = event.xpath('.//*/ul/li/a/@href')[1]
label= event.xpath('.//*/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/div/text()')
e['label'] = ''.join([x.replace('\t', '').replace('\n', '') for x in label]).strip()
newLinks.append(e)
ev['label'] = ''.join([x.replace('\t', '').replace('\n', '') for x in label]).strip()
newLinks.append(ev)
except Exception as e:
print("Error: ", e)
print("Error: ", ev, e)
links = links + newLinks
return links
@@ -89,35 +95,47 @@ ppr(len(totalLinks))
for event in totalLinks:
br.get(event['link'])
sleep(2)
sleep(1)
ps = html.fromstring(br.page_source)
dateTime= ps.xpath('.//*/div[@class="elementor-element elementor-element-d9beb21 elementor-widget elementor-widget-heading"]/span[@class="elementor-heading-title elementor-size-default"]/text()')
event['dateTime'] = [x[3:].split('-')[0].strip() for x in dateTime]
locations = ps.xpath('.//*/div[@class="elementor-element elementor-element-f04aae3 elementor-widget__width-initial elementor-widget-mobile__width-initial elementor-widget elementor-widget-ts-advanced-list"]/*/li[@class="elementor-repeater-item-138dbed flexify ts-action"]/a/text()')
location = [x.replace('\t', '').replace('\n', '') for x in locations]
if len(location) == 2:
event['location'] = "{0}, {1}".format(location[1], location[0])
else:
try:
event['location'] = location[0]
except:
event['location'] = 'n/a'
address= ps.xpath('.//*/ul[@class="flexify simplify-ul ts-advanced-list"]/li[@class="elementor-repeater-item-842568d flexify ts-action"]/div/text()')
try:
event['address'] = [x for x in address if 'Capacidad' not in x and '$' not in x][0]
except:
event['address'] = address
event['dateStamp'] = datetime.strptime(event['dateTime'][0] +tz_str, DATETIME_FORMAT)
if future_date <= event['dateStamp']:
print("Future Date")
pass
else:
locations = ps.xpath('.//*/div[@class="elementor-element elementor-element-f04aae3 elementor-widget__width-initial elementor-widget-mobile__width-initial elementor-widget elementor-widget-ts-advanced-list"]/*/li[@class="elementor-repeater-item-138dbed flexify ts-action"]/a/text()')
location = [x.replace('\t', '').replace('\n', '') for x in locations]
if len(location) == 2:
event['location'] = "{0}, {1}".format(location[1], location[0])
else:
try:
event['location'] = location[0]
except:
event['location'] = 'n/a'
address= ps.xpath('.//*/ul[@class="flexify simplify-ul ts-advanced-list"]/li[@class="elementor-repeater-item-842568d flexify ts-action"]/div/text()')
try:
event['address'] = [x for x in address if 'Capacidad' not in x and '$' not in x][0]
except:
event['address'] = address
except Exception as e:
print("Error: ", event, e)
pass
# ppr(event)
sleep(2)
sleep(1)
br.close()
data = totalLinks
data = [i for i in totalLinks if 'dateStamp' in i]
new_data = [i for i in data if 'location' in i]
print("Set:", len(totalLinks))
print("Data Set:", len(data))
print("New Data Set:", len(new_data))
paisa = []
for d in data:
for d in new_data:
if len(d['dateTime']) != 0:
if 'Familia' in d['label']:
d['category'] = 'Ot'
@@ -139,7 +157,6 @@ for d in data:
cal = Calendar.objects.get(shortcode='mde')
for d in paisa:
d['dateStamp'] =datetime.strptime(d['dateTime'][0], DATETIME_FORMAT)
try:
nvenue, created = Organization.objects.get_or_create(
name=d['venue'],
@@ -152,6 +169,7 @@ for d in paisa:
nvenue = Organization.objects.get(name=d['venue'])
nvenue.address_complete = d['address']
nvenue.save()
new_event, created = Event.objects.update_or_create(
event_type = d['category'],
show_title = d['title'],
@@ -163,6 +181,6 @@ for d in paisa:
)
new_event.calendar.add(cal)
new_event.save()
print(new_event)
# print("Success:", new_event)
digitools.updateScraper(scraper, item_count_start)

View File

@@ -13,9 +13,8 @@ import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%b %d %Y %I:%M %p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%b %d %Y %I:%M %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Acme Comedy Club",
@@ -43,14 +42,13 @@ def get_events(ps, event_type):
event['title'] = c.xpath('.//*/span[@class="event_title"]/a/text()')[0]
event['date'] = [month, day, str(year), c.xpath('.//*/span[@class="event_time"]/text()')[0].strip()]
event['date'] = " ".join(event['date'])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['dateStamp'] = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT)
event['link'] = c.xpath('.//*/span[@class="event_title"]/a/@href')[0]
digitools.createBasicEvent(event, "Co", venue)
scraper.items+=1
except Exception as e:
print(e)
ppr(event)
print("\n\n+++\n\n")
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]

View File

@@ -13,7 +13,6 @@ import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Green Room",
city="Minneapolis",
@@ -25,10 +24,8 @@ scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'm
event_type = "Mu"
# Time Signatures
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%a %b %d %Y %I:%M %p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%a %b %d %Y %I:%M %p %z %Z'
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="vp-event-card vp-venue-greenroom vp-col"]')
@@ -45,14 +42,13 @@ def get_events(ps, event_type):
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/div[@class="vp-event-name"]/text()')[0]
event['datetime'] = date + " " + str(year) + " " + time
event['dateStamp'] = datetime.strptime(event['datetime'], DATETIME_FORMAT)
event['dateStamp'] = datetime.strptime(event['datetime'] + tz_str, DATETIME_FORMAT)
event['link'] = venue.website + c.xpath('.//a[@class="vp-event-link"]/@href')[0]
digitools.createBasicEvent(event, event_type, venue)
scraper.items+=1
except Exception as e:
print(e)
ppr(event)
print("\n+++\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]

View File

@@ -26,8 +26,8 @@ venue, created = Organization.objects.get_or_create(
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
ppr(scraper)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%a, %b %d, %Y @ %I:%M %p'
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%a, %b %d, %Y @ %I:%M %p %z %Z'
# Set initial variables for City, etc
calendar_url = [
@@ -60,7 +60,7 @@ def get_listings(pse, events):
except:
e['price'] = "See Link"
e['image'] = event.xpath('.//*/img/@data-src')[0]
e["date_time"] = datetime.strptime(e['datetime'], DATETIME_FORMAT)
e["date_time"] = datetime.strptime(e['datetime'] + tz_str, DATETIME_FORMAT)
e['scraper'] = scraper
e['calendar'] = [scraper.calendar]
events.append(e)

View File

@@ -22,8 +22,8 @@ venue, created = Organization.objects.get_or_create(
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
DATETIME_FORMAT = '%A, %B %d , %Y %I:%M %p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%A, %B %d , %Y %I:%M %p %z %Z'
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="day has-event"]')
@@ -38,7 +38,7 @@ def get_events(ps, event_type):
event['title'] = c.xpath('.//*/h3/text()')[0]
event['date'] = [month, day, year, c.xpath('.//*/p[@class="time"]/text()')[0]]
event['date'] = " ".join(event['date'])
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
event['dateStamp'] =datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT)
event['link'] = "https://www.magersandquinn.com" + c.xpath('.//a[@class="event in-store"]/@href')[0]
digitools.createBasicEvent(event, "Ed", venue)
scraper.items+=1
@@ -63,7 +63,7 @@ for link in links:
ps = digitools.getSource(br, link)
get_events(ps, "Ed")
sleep(3)
# ppr(events)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -26,8 +26,8 @@ except Exception as e:
scraper,item_count_start, virtcal = digitools.getScraper(venue, venue.website, 'msp')
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%b %d, %Y %I:%M %p'
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%b %d, %Y %I:%M %p %z %Z'
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="summary-content sqs-gallery-meta-container"]')
@@ -40,7 +40,7 @@ def get_events(ps, event_type):
event['title'] = c.xpath('.//*/a[@class="summary-title-link"]/text()')[0]
event['link'] = "https://theparkwaytheater.com" + c.xpath('.//*/a[@class="summary-title-link"]/@href')[0]
event['date'] = c.xpath('.//div/div/time/text()')[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("")[0].strip()
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['dateStamp'] = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT)
event['desc'] = c.xpath('.//*/p/text()')[0]
event['img_link'] = i
event['details'] = {
@@ -54,8 +54,8 @@ def get_events(ps, event_type):
event_type = event_type,
show_title = event['title'],
show_link = event['link'],
show_date = datetime.strptime(event['date'], DATETIME_FORMAT),
show_day = datetime.strptime(event['date'], DATETIME_FORMAT),
show_date = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
show_day = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
more_details = event["details"],
venue = venue
)
@@ -64,14 +64,14 @@ def get_events(ps, event_type):
except Exception as e:
try:
event['date'] = c.xpath('.//div/div/time/text()')[0].split("")[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("")[0].strip()
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['dateStamp'] = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT)
new_event, created = DSEvent.objects.update_or_create(
scraper = scraper,
event_type = event_type,
show_title = event['title'],
show_link = event['link'],
show_date = datetime.strptime(event['date'], DATETIME_FORMAT),
show_day = datetime.strptime(event['date'], DATETIME_FORMAT),
show_date = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
show_day = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
more_details = event["details"],
venue = venue
)
@@ -97,8 +97,6 @@ get_events(ps, "Mu")
calendar_url = "https://theparkwaytheater.com/movies"
ps = digitools.getSource(br, calendar_url)
get_events(ps, "Th")
# ppr(events)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -13,7 +13,6 @@ import pytz
from events.models import Organization, Scraper, Event
import events.digitools as digitools
from selenium.webdriver.common.by import By
from lxml import html
@@ -26,13 +25,9 @@ venue, created = Organization.objects.get_or_create(
scraper,item_count_start, virtcal = digitools.getScraper(venue, venue.website, 'msp')
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%a %B %d @ %I:%M %p %Y'
DATETIME_FORMAT_2 = '%a %B %d, %Y @ %I:%M %p'
# DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y'
# DATETIME_FORMAT_3 = '%b %d %Y'
# Set initial variables for City, etc
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%a %B %d @ %I:%M %p %Y %z %Z'
DATETIME_FORMAT_2 = '%a %B %d, %Y @ %I:%M %p %z %Z'
calendar_url = 'https://noboolpresents.com/venues/uptown-vfw/'
current_year = str(datetime.now().year)
@@ -54,16 +49,14 @@ def getEvents(br):
dateTime = event.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].replace("", "").strip() + " " + current_year
title = event.xpath('.//*/h2[@class="alt-font"]/a/text()')[0].replace("\n", "").replace("\t", "")
link = event.xpath('.//*/h2[@class="alt-font"]/a/@href')[0]
# deets["tickets"] = event.xpath('.//*/span[@class="tribe-events-c-small-cta__price"]/strong/text()')[0]
try:
new_event, created = Event.objects.update_or_create(
scraper = scraper,
event_type = 'Mu',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT),
# more_details = deets["tickets"],
show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
venue = venue
)
digitools.add_calendar(new_event, 'msp')
@@ -75,9 +68,8 @@ def getEvents(br):
event_type = 'Mu',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime[:-4].strip(), DATETIME_FORMAT_2),
show_day = datetime.strptime(dateTime[:-4].strip(), DATETIME_FORMAT_2),
# more_details = deets["tickets"],
show_date = datetime.strptime(dateTime[:-4].strip() + tz_str, DATETIME_FORMAT_2),
show_day = datetime.strptime(dateTime[:-4].strip() + tz_str, DATETIME_FORMAT_2),
venue = venue
)
digitools.add_calendar(new_event, 'msp')

View File

@@ -13,9 +13,8 @@ import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%b %d %I:%M %p %Y'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%b %d %I:%M %p %Y %z %Z'
venue, created = Organization.objects.get_or_create(
name="Cabooze",
@@ -29,7 +28,6 @@ scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'm
def get_events(ps, event_type):
print("Getting events ...")
contents = ps.xpath('.//*/div[@class="vp-event-row vp-widget-reset vp-venue-thecabooze"]')
ppr(contents)
for c in contents:
try:
event = {}
@@ -45,16 +43,14 @@ def get_events(ps, event_type):
event['title'] = c.xpath('.//*/div[@class="vp-event-name"]/text()')[0]
event['date'] = [date, time, str(year)]
event['date'] = " ".join(event['date'])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['dateStamp'] = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT)
event['link'] = "https://www.cabooze.com/" + c.xpath('.//a[@class="vp-event-link"]/@href')[0]
# print("Event Dict Created")
# ppr(event)
digitools.createBasicEvent(event, event_type, venue)
scraper.items+=1
except Exception as e:
print(e)
ppr(event)
print("\n\n+++\n\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]

View File

@@ -22,17 +22,13 @@ venue, created = Organization.objects.get_or_create(
scraper,item_count_start,virtcal = digitools.getScraper(venue,venue.website, 'msp')
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
DATETIME_FORMAT_3 = '%A, %B %d at %I:%M%p %Y'
DATETIME_FORMAT_4 = '%A, %B %d at %I%p %Y'
DATETIME_FORMAT_5 = '%A, %B %d @%I%p %Y'
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p %z %Z'
def get_events(ps):
links = ps.xpath('.//*/div[@class="summary-title"]/a/@href')
print("Length of Links: ", len(links))
links = list(set(links))
print("New Length of Links: ", len(links))
for l in links:
if "cedar-news-blog" in l:
continue
@@ -54,7 +50,7 @@ def get_events(ps):
print(e)
print("failed event: ", event)
dateStamp = date + " " + time
event['dateStamp'] = datetime.strptime(dateStamp, DATETIME_FORMAT)
event['dateStamp'] = datetime.strptime(dateStamp + tz_str, DATETIME_FORMAT)
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1

View File

@@ -35,9 +35,10 @@ venue, created = Organization.objects.get_or_create(
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
DATETIME_FORMAT = '%b %d %I%p %Y'
DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y'
DATETIME_FORMAT_3 = '%b %d %Y'
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%b %d %I%p %Y %z %Z'
DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y %z %Z'
DATETIME_FORMAT_3 = '%b %d %Y %z %Z'
# Set initial variables for City, etc
calendar_url = 'https://331club.com/#calendar'
@@ -72,13 +73,13 @@ for d in dates:
event = {}
event["datetime"] = event_date + time + [current_year]
try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT)
event["date_time"] = datetime.strptime(" ".join(event["datetime"]) + tz_str, DATETIME_FORMAT)
except:
try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_2)
event["date_time"] = datetime.strptime(" ".join(event["datetime"]) + tz_str, DATETIME_FORMAT_2)
except:
try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_3)
event["date_time"] = datetime.strptime(" ".join(event["datetime"]) + tz_str, DATETIME_FORMAT_3)
except:
event["date_time"] = "Invalid"
event["bands"] = (", ").join(bands)

View File

@@ -34,18 +34,17 @@ def get_info(pse):
except Exception as e:
print("details issue: ", e)
try:
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT)
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"] + tz_str, DATETIME_FORMAT)
except Exception as e:
print("Using alt date format 2: ", e)
try:
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT_2)
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"] + tz_str, DATETIME_FORMAT_2)
ppr(event)
except Exception as e:
print("Using alt date format 3: ", e)
print(event['date'])
event["date_time"] = datetime.strptime(" ".join(event["date"]), DATETIME_FORMAT_3)
event["date_time"] = datetime.strptime(" ".join(event["date"]) + tz_str, DATETIME_FORMAT_3)
print("The Event:")
ppr(event)
return event
def get_date(pse, event):
@@ -78,10 +77,10 @@ venue, created = Organization.objects.get_or_create(
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%b %d %Y %I%p'
DATETIME_FORMAT_2 = '%b %d %Y %I:%M%p'
DATETIME_FORMAT_3 = '%b %d %Y'
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%b %d %Y %I%p %z %Z'
DATETIME_FORMAT_2 = '%b %d %Y %I:%M%p %z %Z'
DATETIME_FORMAT_3 = '%b %d %Y %z %Z'
# Set initial variables for City, etc
month = int(datetime.now().month)
@@ -112,8 +111,6 @@ else:
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year) + next_month_string
# print("\n\n", calendar_url, calendar_url_2, "\n\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
@@ -128,6 +125,8 @@ if datetime.now().day < 8:
elif 7 < datetime.now().day < 15:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
ps = digitools.getSource(br, calendar_url_2)
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:15]
elif 14 < datetime.now().day < 21:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:95]
@@ -176,4 +175,3 @@ br.close()
digitools.updateScraper(scraper, item_count_start)
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()

View File

@@ -0,0 +1,79 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
current_year = str(datetime.now().year)
venue, created = Organization.objects.get_or_create(
name="Guthrie Theater",
city="Minneapolis",
website="https://www.guthrietheater.org",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
event_type = "Mu"
tz_str = "-0600 UTC"
DATETIME_FORMAT = '%b %d %Y %I:%M %p %z %Z'
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="image-callouts-item-inner"]')
events = []
year = datetime.today().year
for c in contents:
try:
event = {}
event['title'] = c.xpath('.//figcaption/p/text()')[0]
event['link'] = c.xpath('.//figcaption/a/@href')[0]
event['link'] = venue.website + event['link']
event['scraper'] = scraper
event['calendars'] = scraper.calendar
# ppr(event)
events.append(event)
except Exception as e:
print(e)
pass
for e in events[:-3]:
try:
ps = digitools.getSource(br, e['link'])
nc = ps.xpath('.//div[@class="modal-row-inner buytix"]')
for n in nc:
date = n.xpath('.//div[@class="buytix-info"]/h4/text()')[0].split(',')[1].strip()
time = n.xpath('.//div[@class="buytix-time"]/p/text()')[0].strip()
e['datetime'] = "{0} {1} {2} {3}".format(date, year, time, tz_str)
e['dateStamp'] =datetime.strptime(e['datetime'], DATETIME_FORMAT)
digitools.createBasicEvent(e, 'Th', venue)
scraper.items+=1
except Exception as e:
print("Error: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
shows = "/shows-and-tickets/"
ps = digitools.getSource(br, venue.website+shows)
get_events(ps, event_type)
sleep(3)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -14,9 +14,8 @@ from events.models import Organization, Scraper
import events.digitools as digitools
count = 0
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%a, %b %d %Y %I%p'
DATETIME_FORMAT_2 = '%a, %b %d %Y %I:%M %p SHOW'
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%a, %b %d %Y %I%p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Icehouse",
@@ -38,19 +37,16 @@ def get_events(ps, event_type):
event['link'] = venue.website + c.xpath('.//*/a[@class="show-link"]/@href')[0]
event['date'] = c.xpath('.//*/h4[@class="day-of-week"]/text()')[0]
month = event['date'].split(' ')[1].strip()
print("MONTH: ", month)
year = int(datetime.today().year)
if month in ['Jan', 'Feb', 'Mar']:
year = year + 1
time = c.xpath('.//p/span/text()')[0][:4].strip()
if time[-1:] == 'm':
event['dateStamp'] = event['date'] + ' ' + str(year) + ' ' + time
event['dateStamp'] =datetime.strptime(event['dateStamp'], DATETIME_FORMAT)
event['dateStamp'] =datetime.strptime(event['dateStamp'] + tz_str, DATETIME_FORMAT)
digitools.createBasicEvent(event, event_type, venue)
else:
ppr(event)
print("MONTH: ", month)
print("Failed ?")
except Exception as e:
print("What?",e,"\n\n+++")
@@ -65,6 +61,5 @@ ps = digitools.getSource(br, venue.website)
br.execute_script("window.scrollTo(0, window.scrollY + 5000)")
get_events(ps, "Mu")
# ppr(events)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -13,7 +13,6 @@ import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
current_year = str(datetime.now().year)
venue, created = Organization.objects.get_or_create(
@@ -27,39 +26,34 @@ scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'm
event_type = "Mu"
# Time Signatures
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%b. %d %Y %I:%M%p'
DATETIME_FORMAT_night = '%b. %d %Y %I:%M %p'
DATETIME_FORMAT_2 = '%b. %d %Y %I:%Mam'
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%b. %d %Y %I:%M%p %z %Z'
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="sse-row sse-clearfix"]')
for c in contents:
year = datetime.today().year
try:
date = c.xpath('.//h1[@class="sse-size-64"]/text()')[0]
month = date.split(".")[0]
ppr(date)
if month in ['JAN', 'FEB', 'MAR']:
year = int(datetime.today().year) + 1
# ppr(date)
# if month in ['JAN', 'FEB', 'MAR']:
# year = int(datetime.today().year) + 1
event = {}
event['scraper'] = scraper
event['calendars'] = scraper.calendar
event['link'] = venue.website
event['title'] = c.xpath('.//p/span/b/text()')[0]
event['deets'] = c.xpath('.//p/span/text()')[0]
event['title'] = event['title'] + ' ' + event['deets']
paras = c.xpath('.//p/text()')
times = paras[1].split(" ")[1]
event['datetime'] = "{0} {1} {2}".format(date, year, times)
event['dateStamp'] =datetime.strptime(event['datetime'], DATETIME_FORMAT)
ppr(event)
event['dateStamp'] =datetime.strptime(event['datetime'] + tz_str, DATETIME_FORMAT)
print("Print Events: ")
digitools.createBasicEvent(event, event_type, venue)
except Exception as e:
# print(e)
print("Error: ", e)
pass
if len(sys.argv) >= 2:

View File

@@ -21,9 +21,8 @@ venue, created = Organization.objects.get_or_create(
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
DATETIME_FORMAT = '%B %d %Y %I:%M%p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%B %d %Y %I:%M%p %z %Z'
def get_events(ps):
contents = ps.xpath('.//*/ul[@class="events-list"]/li')
@@ -38,12 +37,11 @@ def get_events(ps):
event['title'] = c.xpath('.//div/h4/a/text()')[0]
event['date'] = [month, day, str(year), c.xpath('.//div[@class="event-info"]/p/text()')[0].split(" ")[0]]
event['date'] = " ".join(event['date'])
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
event['dateStamp'] =datetime.strptime(event['date'] +tz_str, DATETIME_FORMAT)
event['link'] = c.xpath('.//div[@class="event-info"]/h4/a/@href')[0]
if " presents" in event['title']:
event['title'] = event['title'].split("presents")[1][1:].strip()
if event['title'].startswith('.'):
print("BLAHH\n")
event['title'] = event['title'][1:].strip()
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1

View File

@@ -22,12 +22,8 @@ venue, created = Organization.objects.get_or_create(
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
event_type = ""
# Time Signatures
tz = pytz.timezone("US/Central")
DATETIME_FORMAT_2 = '%b %d %Y %I:%M %p'
DATETIME_FORMAT = '%B %d @ %I:%M %p %Y'
tz_str = " -0600 UTC"
DATETIME_FORMAT = '%B %d @ %I:%M %p %Y %z %Z'
def get_events(ps, event_type):
contents = ps.xpath('.//*/article')
@@ -41,15 +37,13 @@ def get_events(ps, event_type):
year = int(year) + 1
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/h3/a/text()')[0].replace("\n", "").replace("\t", "")
event['title'] = c.xpath('.//*/h4/a/text()')[0].replace("\n", "").replace("\t", "")
event['date'] = " ".join([ dateTime, str(year)])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = c.xpath('.//*/h3/a/@href')[0]
event['dateStamp'] = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT)
event['link'] = c.xpath('.//*/h4/a/@href')[0]
digitools.createBasicEvent(event, event_type, venue)
except Exception as e:
print(e)
ppr(event)
print("\n\n+++\n\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]

View File

@@ -11,7 +11,6 @@ from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
@@ -23,7 +22,6 @@ venue, created = Organization.objects.get_or_create(
scraper,item_count_start, virtcal = digitools.getScraper(venue, venue.website, 'msp')
# Time Signatures
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'

View File

@@ -0,0 +1,75 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0800 UTC"
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p %z %Z'
DATETIME_FORMAT_2 = '%A, %B %d, %Y %I%p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Mississippi Studios",
city="Portland",
website="https://mississippistudios.com/full-view/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'pdx')
scraper.items = 0
scraper.save()
def get_events(contents, event_type):
contents = ps.xpath('.//*/div[@class="event"]/div/div[@class="event__inner"]')
i = 0
for c in contents:
i += 1
try:
event = {}
date = c.xpath('.//*/h5/text()')[0].replace("\n", "").replace("\t", "")#.replace("rd", "").replace("nd", "").strip()
time = c.xpath('//div/div/div[4]/p[1]/span/text()')[i].split("/")[0].replace("Doors: ", "").strip()
event['date'] = (' ').join([date, time, tz_str])
event['date'] = (' ').join([date, time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/h2/a/text()')[0]
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT_2)
event['link'] = c.xpath('.//*/h2/a/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
link = venue.website + "2"
print(link)
ps = digitools.getSource(br, link)
get_events(ps, "Mu")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,75 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0800 UTC"
DATETIME_FORMAT = '%a, %B %d, %Y %I:%M %p %z %Z'
DATETIME_FORMAT_2 = '%a, %B %d, %Y %I%p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Revolution Hall",
city="Portland",
website="https://www.revolutionhall.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'pdx')
scraper.items = 0
scraper.save()
def get_events(contents, event_type, bar):
for c in contents:
try:
event = {}
date = c.xpath('.//*/span[@class="event-date--full"]/text()')[0].replace("th", "").replace("st", "").replace("rd", "").replace("nd", "").strip()
time = c.xpath('.//*/span[@class="event-doors-showtime"]/text()')[0].split("/")[0].replace("Doors: ", "").strip()
if not "Show" in time:
event['date'] = (' ').join([date, time, tz_str])
else:
time = time.replace("Show: ", "").strip()
event['date'] = (' ').join([date, time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/div[@class="event__content"]/h3/a/text()')[0]
if bar == "Show Bar":
event['title'] = c.xpath('.//*/div[@class="event__content"]/h3/a/text()')[0] + " (Show Bar)"
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT_2)
event['link'] = c.xpath('.//*/div[@class="event__content"]/h3/a/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
print("\nSuccess\n")
except Exception as e:
print("\nError: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
contents = ps.xpath('.//*/div[@class="event-wrapper revolution-hall"]')
get_events(contents, "Mu", "Rev Hall")
contents = ps.xpath('.//*/div[@class="event-wrapper show-bar"]')
get_events(contents, "Mu", "Show Bar")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,68 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0800 UTC"
DATETIME_FORMAT = '%a, %b %d %Y %I:%M%p %z %Z'
DATETIME_FORMAT_2 = '%a, %b %d %Y %I %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Roseland Theater",
city="Portland",
website="https://roselandpdx.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'pdx')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="col-12 eventWrapper rhpSingleEvent py-4 px-0 rhp-event__single-event--list"]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/div[@id="eventDate"]/text()')[0].replace("\n", "").replace("\t", "")
time = c.xpath('.//*/span[@class="font0by75 fontWeight500 lineHeight15 rhp-event__time-text--list"]/text()')[0].split(" ")[:3]
time = (" ").join(time).replace("Doors: ", "").replace("\n", "").replace("\t", "")
year = datetime.now().year
event['date'] = (' ').join([date, str(year), time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/a[@id="eventTitle"]/h2/text()')[0].replace("\n", "").replace("\t", "")
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT_2)
event['link'] = c.xpath('.//*/a[@id="eventTitle"]/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,72 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0800 UTC"
DATETIME_FORMAT = '%a, %b %d %Y %I:%M%p %z %Z'
DATETIME_FORMAT_2 = '%a, %b %d %Y %I %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Roseland Theater",
city="Portland",
website="https://roselandpdx.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'pdx')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="col-12 eventWrapper rhpSingleEvent py-4 px-0 rhp-event__single-event--list"]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/div[@id="eventDate"]/text()')[0].replace("\n", "").replace("\t", "")
time = c.xpath('.//*/span[@class="font0by75 fontWeight500 lineHeight15 rhp-event__time-text--list"]/text()')[0].split(" ")[:3]
time = (" ").join(time).replace("Doors: ", "").replace("\n", "").replace("\t", "")
year = datetime.now().year
event['date'] = (' ').join([date, str(year), time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/a[@id="eventTitle"]/h2/text()')[0].replace("\n", "").replace("\t", "")
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT_2)
event['link'] = c.xpath('.//*/a[@id="eventTitle"]/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
# ppr(event)
print("\nSuccess\n")
except Exception as e:
print("\nError: ", e)
# ppr(event)
# print("\n+++\n")
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,67 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "-0800 UTC"
DATETIME_FORMAT = '%b %d, %Y %I:%M %p %z %Z'
DATETIME_FORMAT_2 = '%a, %b %d %Y %I %p %z %Z'
venue, created = Organization.objects.get_or_create(
name="Volcanic Theater",
city="Portland",
website="https://www.volcanictheatre.com/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'pdx')
scraper.items = 0
scraper.save()
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="event-card"]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/div[@class="date"]/text()')[0]
time = c.xpath('.//*/div[@class="doors-open"]/div/text()')[1]
event['date'] = (' ').join([date, time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//div[@class="headline"]/text()')
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except Exception as e:
print(e)
pass
event['link'] = c.xpath('.//a/@href')
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -1,7 +1,5 @@
#!/bin/bash
BASEDIR=/var/www/digisnaxx.ado/api.digisnaxx
EVENTSDIR=/var/www/digisnaxx.ado/api.digisnaxx/events
SCRIPTDIR=/var/www/digisnaxx.ado/api.digisnaxx/events/scrapers/scripts
GOVTDIR=/var/www/digisnaxx.ado/api.digisnaxx/events/scrapers/Working/govt

View File

@@ -1,7 +1,5 @@
#!/bin/bash
BASEDIR=/var/www/digisnaxx.ado/api.digisnaxx
DJANGODIR=/var/www/digisnaxx.ado/api.digisnaxx/events
SCRIPTDIR=/var/www/digisnaxx.ado/api.digisnaxx/events/scrapers/scripts
ICALDIR=/var/www/digisnaxx.ado/api.digisnaxx/events/scrapers/Working/iCal

View File

@@ -1,10 +1,9 @@
import re, os, sys
from datetime import datetime, timedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from events.models import Event,Organization, Promo, Calendar