first commit

This commit is contained in:
2025-10-11 03:50:49 -05:00
parent fcdef3ffe1
commit 9e9668172c
353 changed files with 47535 additions and 0 deletions

View File

@@ -0,0 +1,147 @@
# Install Chromedriver and Quarantine
# xattr -d com.apple.quarantine <name-of-executable>
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
from selenium import webdriver as wd
from xvfbwrapper import Xvfb
from lxml import html
import pytz
from events.models import Event, Organization, Scraper
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
try:
scraper, created = Scraper.objects.get_or_create(
name="MN Legislature",
website="https://www.leg.mn.gov/cal?type=all",
items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name="MN Legislature")
print("Scraper: ", scraper)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
# Set initial variables for City, etc
calendar_url = 'https://www.leg.mn.gov/cal?type=all'
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(calendar_url)
sleep(60)
ps = html.fromstring(br.page_source)
commEvents = ps.xpath('.//*/div[@class="card border-dark comm_item cal_item ml-lg-3"]')
senateEvents = ps.xpath('.//*/div[@class="card border-dark senate_item cal_item ml-lg-3"]')
houseEvents = ps.xpath('.//*/div[@class="card border-dark house_item cal_item ml-lg-3"]')
meetings = []
for hE in houseEvents:
details = {}
dateTime = hE.xpath('.//*/b/text()')[0]
try:
title = hE.xpath('.//*/h3/a/text()')[0]
except:
title = hE.xpath('.//*/h3/text()')[0]
try:
link = "https://www.leg.mn.gov/" + hE.xpath('.//*/div[@class="float-right text-center mr-2 d-print-none"]/a/@href')[0]
except:
link = hE.xpath('.//*/h3/a/@href')[0]
details['location'] = hE.xpath('.//*/div[@class=""]/text()')[0]
# print(dateTime, title, link, details['location'])
venue, created = Organization.objects.get_or_create(name="MN House", city="St. Paul")
new_event, created = Event.objects.update_or_create(
calendar = 'msp'
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
venue = venue
)
scraper.items+=1
for sE in senateEvents:
details = {}
dateTime = sE.xpath('.//*/b/text()')[0]
try:
title = sE.xpath('.//*/h3/a/text()')[0]
except:
title = sE.xpath('.//*/h3/text()')[0]
try:
link = "https://www.leg.mn.gov/" + sE.xpath('.//*/div[@class="float-right text-center mr-2"]/a/@href')[0]
except:
link = sE.xpath('.//*/h3/a/@href')[0]
location_list = sE.xpath('.//*/text()')
if 'Location: ' in location_list:
iN = location_list.index("Location: ")
details['location'] = location_list[iN + 1]
elif 'Senate Floor Session' in location_list:
details['location'] = 'Senate Floor Session'
venue, created = Organization.objects.get_or_create(name="MN Senate", city="St. Paul")
new_event = Event.objects.update_or_create(
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
venue = venue
)
scraper.items+=1
for cE in commEvents:
details = {}
dateTime = cE.xpath('.//*/b/text()')[0]
try:
title = cE.xpath('.//*/h3/a/text()')[0]
except:
title = cE.xpath('.//*/h3/text()')[0]
try:
link = "https://www.leg.mn.gov/" + cE.xpath('.//*/div[@class="float-right text-center mr-2"]/a/@href')[0]
except:
link = cE.xpath('.//*/h3/a/@href')[0]
location_list = cE.xpath('.//*/text()')
if 'Room: ' in location_list:
iN = location_list.index("Room: ")
details['location'] = location_list[iN + 1]
# print(dateTime, title, link, details['location'])
venue, created = Organization.objects.get_or_create(name="MN Legislature", city="St. Paul")
new_event = Event.objects.update_or_create(
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
venue = venue
)
scraper.items+=1
br.close()
scraper.save()
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()

View File

@@ -0,0 +1,99 @@
import re, os, sys
from datetime import datetime
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from events.models import Event, Organization, Scraper
from time import sleep
from pprint import pprint as ppr
from selenium import webdriver as wd
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from xvfbwrapper import Xvfb
from lxml import html
import pytz
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
try:
scraper, created = Scraper.objects.get_or_create(
name="Mpls City Council",
website="https://lims.minneapolismn.gov/Calendar/citycouncil/upcoming",
items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name="Mpls City Council")
print("Scraper: ", scraper)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%A, %b %d, %Y %I:%M %p'
calendar_url = 'https://lims.minneapolismn.gov/Calendar/citycouncil/upcoming'
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(calendar_url)
sleep(25)
# br.find_element(By.XPATH, '//*/li[@class="tab-header-small"]/a').click()
# sleep(15)
# all_entries = Select(br.find_element(By.XPATH, '//*/select'))
# all_entries.select_by_value('50')
# sleep(15)
ps = html.fromstring(br.page_source)
dayBlocks = ps.xpath('.//*/div[@class="ng-scope"]')
meetings = []
for dB in dayBlocks:
date = dB.xpath('.//div[@class="row"]/div/span[@class="ng-binding"]/text()')[0]
events = dB.xpath('.//div[@class="upcoming ng-scope"]/div')
for event in events:
time = event.xpath('.//div/text()')[0]
title = event.xpath('.//div/a/text()')[0].strip()
if not len(title) > 0:
title = event.xpath('.//div/span/a/text()')[0].strip()
link = event.xpath('.//div/a/@href')[0]
if link.startswith("/Download/"):
link = calendar_url
else:
link = "https://lims.minneapolismn.gov" + link
location = title.split(',')[-1].strip()
mtg_title = title.split(',')[:-1]
if len(mtg_title) > 1:
mtg_title = (' -').join(mtg_title).strip()
else:
mtg_title = mtg_title[0].strip()
dateTime = datetime.strptime(date + " " + time, DATETIME_FORMAT)
if location == "City Hall":
location = "Mpls City Hall"
print(dateTime, location, mtg_title, link)
print('\n\n++++\n\n')
venue, created = Organization.objects.get_or_create(name=location, city="Minneapolis")
new_event = Event.objects.update_or_create(
calendar = 'msp'
event_type = 'Gv',
show_title = mtg_title,
show_link = link,
show_date = dateTime,
show_day = dateTime,
venue = venue
)
scraper.items+=1
br.close()
scraper.save()
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()

View File

@@ -0,0 +1,73 @@
import re, os, sys
from datetime import datetime
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from events.models import Event, Organization, Scraper
from time import sleep
from pprint import pprint as ppr
from selenium import webdriver as wd
from selenium.webdriver.common.by import By
from xvfbwrapper import Xvfb
from lxml import html
import pytz
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
scraper, created = Scraper.objects.get_or_create(
name="St Paul City Council",
website="https://www.stpaul.gov/calendar",
last_ran = datetime.now(),
)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%B %d, %Y at %I:%M %p'
calendar_url = 'https://www.stpaul.gov/calendar'
city_site = "https://www.stpaul.gov"
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(calendar_url)
sleep(3)
def getEvents(br):
ps = html.fromstring(br.page_source)
eventBlocks = ps.xpath('.//*/div[@class="calendar__item views-row"]')
for eB in eventBlocks:
title = eB.xpath('.//div/h3[@class="field-content calendar__title"]/text()')[0]
link = city_site + eB.xpath('.//div/span[@class="field-content calendar__link"]/a/@href')[0]
dateTime = eB.xpath('.//div[@class="views-field views-field-field-calendar-date-value"]/span/text()')[0]
print(dateTime, title, link)
print('\n\n++++\n\n')
venue, created = Organization.objects.get_or_create(name="Somewhere in St Paul", city="St. Paul")
new_event = Event.objects.update_or_create(
calendar = 'msp'
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT),
venue = venue
)
getEvents(br)
sleep(5)
br.get("https://www.stpaul.gov/calendar?page=1")
getEvents(br)
br.close()
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()

View File

@@ -0,0 +1,116 @@
import requests, os, sys
from icalendar import Calendar as iCalendar, Event
from datetime import datetime
from dateutil import relativedelta
td = relativedelta.relativedelta(hours=5)
from pprint import pprint as ppr
import pytz
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from events.models import Event as DSEvent, Organization, Scraper
td = relativedelta.relativedelta(hours=5)
odt = datetime.now() + td
venue, created = Organization.objects.get_or_create(
name="MN Launch",
city="Minneapolis",
website="https://mn.gov/launchmn/calendar",
)
try:
scraper, created = Scraper.objects.get_or_create(
name=venue.name,
website=venue.website,
items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name=venue.name)
print("Scraper: ", scraper)
event_type = "Ed"
cal_url = "https://timelyapp.time.ly/api/calendars/54705514/export?format=ics&target=copy&start_date=2024-12-13"
calendar_url = 'https://calendar.google.com/calendar/ical/uvkshlggh1h4ck08emab22btkum9hl94%40import.calendar.google.com/public/basic.ics'
objIcalData = requests.get(cal_url)
gcal = iCalendar.from_ical(objIcalData.text)
cfpa_events = []
tz = pytz.timezone("US/Central")
for component in gcal.walk():
event = {}
event['strSummary'] = f"{(component.get('SUMMARY'))}"
event['strDesc'] = component.get('DESCRIPTION')
event['strLocation'] = component.get('LOCATION')
event['dateStart'] = component.get('DTSTART')
event['dateStamp'] = component.get('DTSTAMP')
if event['dateStamp'] is not None:
event['dateStamp'] = event['dateStart'].dt
if event['dateStart'] is not None:
try:
event['dateStart'] = event['dateStart'].dt
except Exception as e:
event['dateStart'] = event['dateStart'].dt
event['dateEnd'] = (component.get('DTEND'))
if event['dateEnd'] is not None:
event['dateEnd'] = event['dateEnd'].dt
else:
event['dateEnd'] = event['dateStart']
if event['strSummary'] != 'None':
event['details'] = {
"description" : event['strDesc'],
"Location" : event['strLocation'],
}
cfpa_events.append(event)
now_now = datetime.now().astimezone(tz)
try:
if event['dateStart'] > now_now:
print(event['strSummary'])
new_event, created = DSEvent.objects.update_or_create(
calendar = 'msp'
event_type = event_type,
show_title = event['strSummary'],
show_link = venue.website,
show_date = event['dateStart']-td,
show_day = event['dateStart']-td,
more_details = event["details"],
venue = venue
)
scraper.items+=1
if event['strLocation'] != None and event['strLocation'] != 'MN' and event['strLocation'] != 'Online':
loc = event['strLocation'].split('@')
new_venue_name = loc[0]
if len(loc) > 1:
address = loc[1].split(",")
city = address[1].strip()
new_venue, created = Organization.objects.get_or_create(
name=new_venue_name,
city=city,
website="https://mn.gov/launchmn/calendar",
)
new_event.venue = new_venue
new_event.save()
else:
new_event.venue = venue
new_event.save()
except Exception as e:
print(e)
print("Event: ", event['dateStart'], event['strSummary'])
print("Clock: ", now_now)
else:
print("Failed: ", component.get('DESCRIPTION'))
scraper.save()