first push

This commit is contained in:
2025-11-30 16:29:50 -05:00
commit d5055374b4
77 changed files with 9129 additions and 0 deletions

View File

@@ -0,0 +1,78 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
# import django
# sys.path.append('../../../../')
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import digitools as digitools_sm
print("Got DigiTools")
digitools_sm.getReady()
# os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
# django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
import events.digitools as digitools
from events.models import Organization, Scraper, Calendar, Event
venue, created = Organization.objects.get_or_create(
name="Acme Comedy Club",
city="Minneapolis",
website="https://acmecomedycompany.com/the-club/calendar/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%b %d %Y %I:%M %p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
def get_events(ps, event_type):
contents = ps.xpath('.//*/li[@class="event"]')
for c in contents:
try:
event = {}
day = c.xpath('.//*/span[@class="day"]/text()')[0]
month = c.xpath('.//*/span[@class="mth"]/text()')[0]
year = datetime.now().year
if month == "Jan":
year = int(year) + 1
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/span[@class="event_title"]/a/text()')[0]
event['date'] = [month, day, str(year), c.xpath('.//*/span[@class="event_time"]/text()')[0].strip()]
event['date'] = " ".join(event['date'])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = c.xpath('.//*/span[@class="event_title"]/a/@href')[0]
digitools.createBasicEvent(event, "Co", venue)
scraper.items+=1
except Exception as e:
print(e)
ppr(event)
print("\n\n+++\n\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
links = digitools.createURL("https://acmecomedycompany.com/the-club/calendar/")
for link in links:
ps = digitools.getSource(br, link)
get_events(ps, "Co")
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,67 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Amsterdam Bar & Hall",
city="St. Paul",
website="https://www.amsterdambarandhall.com/events-new/",
is_venue=True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
DATETIME_FORMAT = '%B %d %Y %I:%M%p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
def get_events(ps):
contents = ps.xpath('.//*/ul[@class="events-list"]/li')
for c in contents:
try:
event = {}
day = c.xpath('.//*/div[@class="date-day"]/text()')[0]
month = c.xpath('.//*/div[@class="date-month"]/text()')[0]
year = datetime.now().year
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//div/h4/a/text()')[0]
event['date'] = [month, day, str(year), c.xpath('.//div[@class="event-info"]/p/text()')[0].split(" ")[0]]
event['date'] = " ".join(event['date'])
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = c.xpath('.//div[@class="event-info"]/h4/a/@href')[0]
if " presents" in event['title']:
event['title'] = event['title'].split("presents")[1][1:].strip()
if event['title'].startswith('.'):
print("BLAHH\n")
event['title'] = event['title'][1:].strip()
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print(e)
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps)
sleep(3)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,178 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
from pprint import pprint as ppr
from lxml import html
import pytz
from events.models import Organization, Scraper, Event
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="First Avenue",
city="Minneapolis",
website="https://first-avenue.com",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%b %d %Y %I%p'
DATETIME_FORMAT_2 = '%b %d %Y %I:%M%p'
DATETIME_FORMAT_3 = '%b %d %Y'
# Set initial variables for City, etc
month = int(datetime.now().month)
day = int(datetime.now().day)
if month == 12:
next_month = "01"
else:
next_month = month + 1
if next_month < 10:
next_month = "0" + str(next_month)
if month < 10:
month = "0" + str(month)
year = int(datetime.now().year)
calendar_url = 'https://first-avenue.com/shows/?start_date=' + str(year) + str(month) + str(day)
next_month_string = str(next_month) + "01"
if next_month == 1:
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
else:
if int(next_month) == 1:
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
else:
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year) + next_month_string
print("\n\n", calendar_url, calendar_url_2, "\n\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
if datetime.now().day < 8:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
elif 7 < datetime.now().day < 15:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
elif 14 < datetime.now().day < 21:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:95]
ps = digitools.getSource(br, calendar_url_2)
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:31]
else:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
ps = digitools.getSource(br, calendar_url_2)
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
events = []
def get_info(pse):
event = {}
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event["venue"] = pse.xpath('.//*/div[@class="content"]/div/div[@class="venue_name"]/text()')[0].replace('\t', '').replace('\n', '').strip()
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[0].replace('\t', '').replace('\n', '')
if event["show_title"] == "":
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[2].replace('\t', '').replace('\n', '')
event["guests"] = pse.xpath('.//*/div[@class="feature_details_main d-flex align-items-center"]/div/h4/text()')
event["flyer"] = pse.xpath('.//*/img[@class="gig_poster lazy loaded"]/@src')
try:
event = get_date(pse, event)
except Exception as e:
print("date issue: ", e)
try:
event = get_details(pse, event)
except Exception as e:
print("details issue: ", e)
try:
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT)
except Exception as e:
print("Using alt date format 2: ", e)
try:
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT_2)
ppr(event)
except Exception as e:
print("Using alt date format 3: ", e)
print(event['date'])
event["date_time"] = datetime.strptime(" ".join(event["date"]), DATETIME_FORMAT_3)
return event
def get_date(pse, event):
month = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="month"]/text()')[0].replace('\t', '').replace('\n', '')
day = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="day"]/text()')[0].replace('\t', '').replace('\n', '')
year = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="year"]/text()')[0].replace('\t', '').replace('\n', '')
event["date"] = [month, day, year]
return event
def get_details(pse, event):
try:
details = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h6/text()')
info = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h2/text()')
di = zip(details, info)
details = {}
for d,i in di:
details[d] = i
event["details"] = details
return event
except Exception as e:
print("details issue: ", e)
for show in shows:
br.get(show)
sleep(2)
try:
pse = html.fromstring(br.page_source)
except Exception as e:
print(show)
pass
try:
event = get_info(pse)
except Exception as e:
print("get_info error: ", e)
try:
event["link"] = show
if event["venue"] in ["Palace Theater", "Turf Club", "The Fitzgerald Theater", "Amsterdam Bar & Hall"]:
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="St. Paul")
else:
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="Minneapolis")
except Exception as e:
print("Venue creation error: ", e, "\n", event, "\n", event["venue"])
try:
event['dateStamp'] = event['date_time']
event['scraper'] = scraper
new_event, created = digitools.createDetailedEvent(event, "Mu", venue, scraper)
scraper.items+=1
except Exception as e:
print("event creation error: ", e, "\n\n", event, "\n\n", created)
quit()
ppr(events)
br.close()
digitools.updateScraper(scraper, item_count_start)
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()

View File

@@ -0,0 +1,68 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Ginkgo Coffee",
city="Saint Paul",
website="https://ginkgocoffee.com/events/",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
event_type = ""
# Time Signatures
tz = pytz.timezone("US/Central")
DATETIME_FORMAT_2 = '%b %d %Y %I:%M %p'
DATETIME_FORMAT = '%B %d @ %I:%M %p %Y'
def get_events(ps, event_type):
contents = ps.xpath('.//*/article')
for c in contents:
try:
event = {}
dateTime = c.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0]
month = c.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].split(' ')[0]
year = datetime.now().year
if month == "January":
year = int(year) + 1
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/h3/a/text()')[0].replace("\n", "").replace("\t", "")
event['date'] = " ".join([ dateTime, str(year)])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = c.xpath('.//*/h3/a/@href')[0]
digitools.createBasicEvent(event, event_type, venue)
except Exception as e:
print(e)
ppr(event)
print("\n\n+++\n\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,71 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Green Room",
city="Minneapolis",
website="https://www.greenroommn.com/events",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
event_type = "Mu"
# Time Signatures
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%a %b %d %Y %I:%M %p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="vp-event-card vp-venue-greenroom vp-col"]')
for c in contents:
try:
event = {}
time = c.xpath('.//*/span[@class="vp-time"]/text()')[0].strip()
date = c.xpath('.//*/span[@class="vp-date"]/text()')[0].strip()
month = date.split(" ")[1]
year = datetime.now().year
# if month == "Jan":
# year = int(year) + 1
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/div[@class="vp-event-name"]/text()')[0]
event['datetime'] = date + " " + str(year) + " " + time
event['dateStamp'] = datetime.strptime(event['datetime'], DATETIME_FORMAT)
event['link'] = venue.website + c.xpath('.//a[@class="vp-event-link"]/@href')[0]
digitools.createBasicEvent(event, event_type, venue)
scraper.items+=1
except Exception as e:
print(e)
ppr(event)
print("\n+++\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, event_type)
sleep(3)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,96 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Event
import events.digitools as digitools
from lxml import html
count = 0
venue, created = Organization.objects.get_or_create(
name="Hook & Ladder",
city="Minneapolis",
website="https://thehookmpls.com",
is_venue=True,
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
ppr(scraper)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%a, %b %d, %Y @ %I:%M %p'
# Set initial variables for City, etc
calendar_url = [
"https://thehookmpls.com/events/list/page/1",
"https://thehookmpls.com/events/list/page/2",
"https://thehookmpls.com/events/list/page/3"
]
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
def get_listings(pse, events):
nevents = pse.xpath('.//*/article')
for event in nevents:
e = {}
e['datetime'] = event.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0]
e['show_title'] = event.xpath('.//*/header/h2/a/@title')[0]
e['link'] = event.xpath('.//*/header/h2/a/@href')[0]
try:
e['subtitle'] = event.xpath('.//*/header/div[@class="eventSubHead"]/text()')[0]
except:
continue
try:
e['price'] = event.xpath('.//*/span[@class="tribe-events-c-small-cta__price"]/strong/text()')[0].replace("Tickets ", "")
except:
e['price'] = "See Link"
e['image'] = event.xpath('.//*/img/@data-src')[0]
e["date_time"] = datetime.strptime(e['datetime'], DATETIME_FORMAT)
e['scraper'] = scraper
e['calendar'] = [scraper.calendar]
events.append(e)
events = []
for cal in calendar_url:
br.get(cal)
sleep(3)
pse = html.fromstring(br.page_source)
get_listings(pse, events)
for event in events:
try:
new_event = Event.objects.update_or_create(
scraper = event['scraper'],
event_type = 'Mu',
show_title = event["show_title"],
show_link = event["link"],
show_date = event["date_time"],
show_day = event["date_time"],
guests = " ".join(event["subtitle"]),
venue = venue
)
digitools.add_calendar(new_event, 'msp')
except Exception as e:
print("oops ", e, "\n\n", "Scraper:", scraper)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,70 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Magers & Quinn",
city="Minneapolis",
website="https://www.magersandquinn.com/events",
is_venue=False
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
DATETIME_FORMAT = '%A, %B %d , %Y %I:%M %p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="day has-event"]')
for c in contents:
try:
event = {}
day = c.xpath('.//*/div[@class="dd"]/text()')[0]
month = c.xpath('.//*/div[@class="month"]/text()')[0]
year = c.xpath('.//*/div[@class="year"]/text()')[0]
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/h3/text()')[0]
event['date'] = [month, day, year, c.xpath('.//*/p[@class="time"]/text()')[0]]
event['date'] = " ".join(event['date'])
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = "https://www.magersandquinn.com" + c.xpath('.//a[@class="event in-store"]/@href')[0]
digitools.createBasicEvent(event, "Ed", venue)
scraper.items+=1
except Exception as e:
event['link'] = "https://www.magersandquinn.com" + c.xpath('.//a[@class="event off-site"]/@href')[0]
print("Error: ", e)
ppr(event)
digitools.createBasicEvent(event, "Ed", venue)
print("\n\n+++\n\n")
links = digitools.createBasicURL("https://www.magersandquinn.com/events/")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
for link in links:
ps = digitools.getSource(br, link)
get_events(ps, "Ed")
sleep(3)
# ppr(events)
br.close()
digitools.updateScraper(scraper, item_count_start)

80
Working/venues/MplsVFW.py Normal file
View File

@@ -0,0 +1,80 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Event
import events.digitools as digitools
from selenium.webdriver.common.by import By
from lxml import html
venue, created = Organization.objects.get_or_create(
name="Uptown VFW",
city="Minneapolis",
website="https://noboolpresents.com/venues/uptown-vfw/",
is_venue = True
)
scraper,item_count_start, virtcal = digitools.getScraper(venue, 'msp')
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%a %B %d @ %I:%M %p %Y'
DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y'
DATETIME_FORMAT_3 = '%b %d %Y'
# Set initial variables for City, etc
calendar_url = 'https://noboolpresents.com/venues/uptown-vfw/'
current_year = str(datetime.now().year)
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(calendar_url)
sleep(20)
def getEvents(br):
ps = html.fromstring(br.page_source)
events = ps.xpath('.//*/article')
for event in events:
deets = {}
dateTime = event.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].replace("", "").strip() + " " + current_year
title = event.xpath('.//*/h2[@class="alt-font"]/a/text()')[0].replace("\n", "").replace("\t", "")
link = event.xpath('.//*/h2[@class="alt-font"]/a/@href')[0]
# deets["tickets"] = event.xpath('.//*/span[@class="tribe-events-c-small-cta__price"]/strong/text()')[0]
try:
new_event, created = Event.objects.update_or_create(
scraper = scraper,
event_type = 'Mu',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT),
# more_details = deets["tickets"],
venue = venue
)
digitools.add_calendar(new_event, 'msp')
scraper.items+=1
except Exception as e:
print("oops", e)
getEvents(br)
br.find_element(By.XPATH, './/*/li[@class="tribe-events-c-nav__list-item tribe-events-c-nav__list-item--next"]/a').click()
sleep(5)
getEvents(br)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,106 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Event as DSEvent
import events.digitools as digitools
try:
venue, created = Organization.objects.get_or_create(
name="Parkway Theater",
city="Minneapolis",
website="https://theparkwaytheater.com",
is_venue = True
)
except Exception as e:
venue = Organization.objects.get(name="Parkway Theater")
scraper,item_count_start, virtcal = digitools.getScraper(venue, 'msp')
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%b %d, %Y %I:%M %p'
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="summary-content sqs-gallery-meta-container"]')
img_etc = ps.xpath('.//*/div[@class="summary-thumbnail-outer-container"]/a/div/img/@src')
ps.xpath('.//*/span[@class="event-time-12hr"]/text()')
for c,i in zip(contents,img_etc):
try:
event = {}
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/a[@class="summary-title-link"]/text()')[0]
event['link'] = "https://theparkwaytheater.com" + c.xpath('.//*/a[@class="summary-title-link"]/@href')[0]
event['date'] = c.xpath('.//div/div/time/text()')[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("")[0].strip()
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['desc'] = c.xpath('.//*/p/text()')[0]
event['img_link'] = i
event['details'] = {
'description': event['desc'],
'img_link': event['img_link'],
}
try:
new_event, created = DSEvent.objects.update_or_create(
scraper = scraper,
event_type = event_type,
show_title = event['title'],
show_link = event['link'],
show_date = datetime.strptime(event['date'], DATETIME_FORMAT),
show_day = datetime.strptime(event['date'], DATETIME_FORMAT),
more_details = event["details"],
venue = venue
)
digitools.add_calendar(new_event, 'msp')
scraper.items+=1
except Exception as e:
try:
event['date'] = c.xpath('.//div/div/time/text()')[0].split("")[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("")[0].strip()
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
new_event, created = DSEvent.objects.update_or_create(
scraper = scraper,
event_type = event_type,
show_title = event['title'],
show_link = event['link'],
show_date = datetime.strptime(event['date'], DATETIME_FORMAT),
show_day = datetime.strptime(event['date'], DATETIME_FORMAT),
more_details = event["details"],
venue = venue
)
digitools.add_calendar(new_event, 'msp')
scraper.items+=1
except Exception as e:
print(e)
print("\n\n+++\n\n")
except Exception as e:
continue
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
calendar_url = 'https://theparkwaytheater.com/live-events'
ps = digitools.getSource(br, calendar_url)
get_events(ps, "Mu")
calendar_url = "https://theparkwaytheater.com/movies"
ps = digitools.getSource(br, calendar_url)
get_events(ps, "Th")
# ppr(events)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,97 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="St Paul Chamber Orchestra",
city="St Paul",
website="https://thespco.org",
is_venue = False
)
scraper,item_count_start, virtcal = digitools.getScraper(venue, 'msp')
# Time Signatures
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="event-title"]/a/@href')
for c in set(contents):
try:
link = 'https://content.thespco.org' + c
ps = digitools.getSource(br, link)
ntitle = ps.xpath('.//*/article/h1/text()')
subtitle = ps.xpath('.//*/article/h1/em/text()')
event = {}
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
if len(subtitle) == 1:
if len(ntitle) == 2:
title = ntitle[0] + subtitle[0] + ntitle[1]
elif ntitle[0].startswith(" "):
title = subtitle[0] + ntitle[0]
else:
title = ntitle[0] + subtitle[0]
else:
title = ntitle[0]
events = ps.xpath('.//*/div[@class="day"]')
for e in events:
new_venue = e.xpath('.//*/strong[@class="venue"]/text()')[0].strip()
location = e.xpath('.//*/span[@class="location"]/text()')[0].strip()
if 'Minneapolis' in location:
location = 'Minneapolis'
elif 'St. Paul' in location:
location = 'St. Paul'
else:
location = location
venue, created = Organization.objects.get_or_create(
name=new_venue,
city=location,
is_venue = True
)
dateTime = e.xpath('.//*/h3[@class="date"]/text()')[0].replace("\n", "").replace("\t", "").strip()
event['dateStamp'] = datetime.strptime(dateTime, DATETIME_FORMAT)
event['venue'] = venue
event['location'] = location
event['title'] = "SPCO: " + title
event['link'] = link
event_type = "Mu"
digitools.createBasicEvent(event, event_type, venue)
scraper.items+=1
except Exception as e:
print("ERROR: ", e)
print("\n\n+++\n\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
# Get Event Page Link(s)
links = digitools.createURLNoZero("https://content.thespco.org/events/calendar/")
for link in links:
ps = digitools.getSource(br, link)
get_events(ps, "Mu")
sleep(3)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,73 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%Y-%m-%d %I:%M %p'
venue, created = Organization.objects.get_or_create(
name="White Squirrel",
city="St. Paul",
website="https://whitesquirrelbar.com",
is_venue = True
)
scraper,item_count_start, virtcal = digitools.getScraper(venue, 'msp')
# Set initial variables for City, etc
calendar_url = [
'https://whitesquirrelbar.com/calendar/list/page/1/',
'https://whitesquirrelbar.com/calendar/list/page/2/',
'https://whitesquirrelbar.com/calendar/list/page/3/'
]
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
def get_listings(pse, events):
listings = pse.xpath('.//*/li[@class="tribe-common-g-row tribe-events-calendar-list__event-row"]')
print("get listings", listings)
for l in listings:
event = {}
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
print("1: ", event)
try:
event["image"] = l.xpath('.//*/img/@src')[0]
except:
event["image"] = "none"
event["date"] = l.xpath('.//time/@datetime')[0]
event["time"] = l.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].split("@")[1]
event["title"] = l.xpath('.//*/h3/a/text()')[0].replace("\t", "").replace("\n", "")
event["link"] = l.xpath('.//*/h3/a/@href')[0]
event['datetime'] = event['date'] + " " + event['time']
event["dateStamp"] = datetime.strptime(event['datetime'] , DATETIME_FORMAT)
events.append(event)
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
events = []
for cal in calendar_url:
ps = digitools.getSource(br, cal)
get_listings(ps, events)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,74 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Cedar Cultural Center",
city="Minneapolis",
website="https://www.thecedar.org",
is_venue=True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
DATETIME_FORMAT_3 = '%A, %B %d at %I:%M%p %Y'
DATETIME_FORMAT_4 = '%A, %B %d at %I%p %Y'
DATETIME_FORMAT_5 = '%A, %B %d @%I%p %Y'
def get_events(ps):
links = ps.xpath('.//*/div[@class="summary-title"]/a/@href')
for l in links:
if "cedar-news-blog" in l:
continue
pse = digitools.getSource(br, "https://www.thecedar.org" + l)
event = {}
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['link'] = "https://www.thecedar.org" + l
try:
time = pse.xpath('.//*/time[@class="event-time-localized-start"]/text()')[0]
date = pse.xpath('.//*/time[@class="event-date"]/text()')[0]
event['title'] = pse.xpath('.//*/h1[@class="eventitem-title"]/text()')[0]
except:
try:
time = pse.xpath('.//*/time[@class="event-time-localized"]/text()')[0]
date = pse.xpath('.//*/time[@class="event-date"]/text()')[0]
event['title'] = pse.xpath('.//*/h1[@class="eventitem-title"]/text()')[0]
except Exception as e:
print(e)
print("failed event: ", event)
dateStamp = date + " " + time
event['dateStamp'] = datetime.strptime(dateStamp, DATETIME_FORMAT)
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
calendar_url = 'https://www.thecedar.org/listing'
ps = digitools.getSource(br, calendar_url)
get_events(ps)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,105 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from selenium.webdriver.common.by import By
from lxml import html
from events.models import Organization, Scraper, Event
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Club 331",
city="Minneapolis",
website="https://331club.com",
is_venue=True,
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%b %d %I%p %Y'
DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y'
DATETIME_FORMAT_3 = '%b %d %Y'
# Set initial variables for City, etc
calendar_url = 'https://331club.com/#calendar'
current_year = str(datetime.now().year)
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(calendar_url)
sleep(3)
br.find_element(By.CLASS_NAME, 'more_events').click()
sleep(2)
ps = html.fromstring(br.page_source)
sleep(3)
dates = ps.xpath('.//*/div[@class="event"]')
dates = dates + ps.xpath('.//*/div[@class="event hidden"]')
def process_times(times):
# print("Times: ", times)
time = []
for t in times:
t = t.replace("\n", "").replace("TBA", "")
if len(t) > 0 and t.endswith("pm"):
if "-" in t:
t = t.split("-")[0] + "pm"
time.append(t)
return time
events = []
for d in dates:
event_date = d.xpath('.//div[@class="event-date"]/span/text()')[:2]
cols = d.xpath('.//div[@class="column"]')
for c in cols:
bands = c.xpath('.//p/a/text()')
links = c.xpath('.//p/a/@href')
time = process_times(c.xpath('.//p/text()'))
event = {}
event["datetime"] = event_date + time + [current_year]
try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT)
except:
try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_2)
except:
try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_3)
except:
event["date_time"] = "Invalid"
event["bands"] = (", ").join(bands)
if len(bands) > 0:
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = event["bands"]
event['date'] = event["date_time"]
event['dateStamp'] = event["date_time"]
event['link'] = "https://331club.com/#calendar"
try:
digitools.createBasicEvent(event, "Mu", venue)
except Exception as e:
print('oops', e)
events.append(event)
br.close()
digitools.updateScraper(scraper, item_count_start)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,169 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import json
from selenium.webdriver.common.by import By
from lxml import html
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Events Medellin",
city="Medellin",
website="https://eventario.co/events-category/social/",
is_venue=True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'mde')
DATETIME_FORMAT = '%B %d %Y %I:%M%p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
# with open('data.json') as f:
# totalLinks = json.load(f)
def getLinks(br, url, links):
br.get(url)
sleep(2)
br.execute_script("window.scrollTo(0, window.scrollY + 1500)")
sleep(2)
x = 1
while x == 1:
try:
div = br.find_element(By.XPATH, ".//*/div[@class='feed-pagination flexify']/a")
div.click()
sleep(2)
br.execute_script("window.scrollTo(0, window.scrollY + 1100)")
sleep(2)
except:
x = 0
ps = html.fromstring(br.page_source)
newLinks = []
# newlinks = ps.xpath('.//*/div[@class="e-con-inner"]/*/a/@href')
events = ps.xpath('.//*/div[@class="e-con-inner"]')
for event in events:
e = {}
try:
e['link'] = event.xpath('.//*/a/@href')[0]
e['title'] = event.xpath('.//*/h3/a/text()')[0]
e['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0].replace('\n', '').replace('\t', '')
# e['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0]
e['venueLink'] = event.xpath('.//*/ul/li/a/@href')[1]
label= event.xpath('.//*/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/div/text()')
e['label'] = ''.join([x.replace('\t', '').replace('\n', '') for x in label]).strip()
newLinks.append(e)
except Exception as e:
print("Error: ", e)
links = links + newLinks
return links
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
urls = ["https://eventario.co/events-category/social/", "https://eventario.co/events-category/musica/", "https://eventario.co/events-category/cultura/"]
allLinks = []
for url in urls:
allLinks = getLinks(br, url, allLinks)
totalLinks = list({v['title']:v for v in allLinks}.values())
ppr(len(totalLinks))
# sortedlinks = allLinks.sort()
# ppr(sortedlinks)
for event in totalLinks:
br.get(event['link'])
sleep(2)
ps = html.fromstring(br.page_source)
dateTime= ps.xpath('.//*/div[@class="elementor-element elementor-element-d9beb21 elementor-widget elementor-widget-heading"]/span[@class="elementor-heading-title elementor-size-default"]/text()')
event['dateTime'] = [x[3:].split('-')[0].strip() for x in dateTime]
locations = ps.xpath('.//*/div[@class="elementor-element elementor-element-f04aae3 elementor-widget__width-initial elementor-widget-mobile__width-initial elementor-widget elementor-widget-ts-advanced-list"]/*/li[@class="elementor-repeater-item-138dbed flexify ts-action"]/a/text()')
location = [x.replace('\t', '').replace('\n', '') for x in locations]
if len(location) == 2:
event['location'] = "{0}, {1}".format(location[1], location[0])
else:
try:
event['location'] = location[0]
except:
event['location'] = 'n/a'
address= ps.xpath('.//*/ul[@class="flexify simplify-ul ts-advanced-list"]/li[@class="elementor-repeater-item-842568d flexify ts-action"]/div/text()')
try:
event['address'] = [x for x in address if 'Capacidad' not in x and '$' not in x][0]
except:
event['address'] = address
# ppr(event)
sleep(2)
br.close()
data = totalLinks
print("Set:", len(totalLinks))
paisa = []
for d in data:
if len(d['dateTime']) != 0:
if 'Familia' in d['label']:
d['category'] = 'Ot'
elif 'Comedia' in d['label']:
d['category'] = 'Co'
elif ('Magic' in d['title']) or ('Juegos' in d['label']):
d['category'] = 'Ot'
elif ('Conferencias' in d['label']) or ('Intercambio' in d['label']):
d['category'] = 'Ed'
else:
d['category'] = 'Mu'
if "Antioquia" in d['location']:
try:
d['city'] = d['location'].split(',')[0]
paisa.append(d)
except:
continue
cal = Calendar.objects.get(shortcode='mde')
for d in paisa:
d['dateStamp'] =datetime.strptime(d['dateTime'][0], DATETIME_FORMAT)
try:
nvenue, created = Organization.objects.get_or_create(
name=d['venue'],
city=d['city'],
website=d['venueLink'],
address_complete = d['address'],
is_venue=True
)
except:
nvenue = Organization.objects.get(name=d['venue'])
nvenue.address_complete = d['address']
nvenue.save()
new_event, created = Event.objects.update_or_create(
event_type = d['category'],
show_title = d['title'],
show_link = d['link'],
show_date = d['dateStamp'],
show_day = d['dateStamp'],
scraper = scraper,
venue = nvenue
)
new_event.calendar.add(cal)
new_event.save()
print(new_event)
digitools.updateScraper(scraper, item_count_start)