first push

This commit is contained in:
2025-11-30 16:29:50 -05:00
commit d5055374b4
77 changed files with 9129 additions and 0 deletions

167
Working/govt/MNLeg.py Normal file
View File

@@ -0,0 +1,167 @@
# Install Chromedriver and Quarantine
# xattr -d com.apple.quarantine <name-of-executable>
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
from pprint import pprint as ppr
from selenium import webdriver as wd
from xvfbwrapper import Xvfb
from lxml import html
import pytz
from events.models import Event, Organization, Scraper, Calendar
from events.digitools import getBrowser, createURL, createBasicEvent, getSource, add_calendar
try:
scraper, created = Scraper.objects.get_or_create(
name="MN Legislature",
website="https://www.leg.mn.gov/cal?type=all",
calendar = Calendar.objects.get(shortcode='msp'),
items = 0,
new_items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name="MN Legislature")
print("Scraper: ", scraper)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
# Set initial variables for City, etc
calendar_url = 'https://www.leg.mn.gov/cal?type=all'
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(calendar_url)
sleep(10)
ps = html.fromstring(br.page_source)
commEvents = ps.xpath('.//*/div[@class="card border-dark comm_item cal_item ml-lg-3"]')
senateEvents = ps.xpath('.//*/div[@class="card border-dark senate_item cal_item ml-lg-3"]')
houseEvents = ps.xpath('.//*/div[@class="card border-dark house_item cal_item ml-lg-3"]')
meetings = []
for hE in houseEvents:
details = {}
dateTime = hE.xpath('.//*/b/text()')[0]
try:
title = hE.xpath('.//*/h3/a/text()')[0]
except:
title = hE.xpath('.//*/h3/text()')[0]
try:
link = "https://www.leg.mn.gov/" + hE.xpath('.//*/div[@class="float-right text-center mr-2 d-print-none"]/a/@href')[0]
except:
link = hE.xpath('.//*/h3/a/@href')[0]
details['location'] = hE.xpath('.//*/div[@class=""]/text()')[0]
# print(dateTime, title, link, details['location'])
venue, created = Organization.objects.get_or_create(name="MN House", city="St. Paul")
new_event, created = Event.objects.update_or_create(
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
venue = venue,
scraper = scraper
)
if type(new_event) is tuple:
print("STS: ", new_event)
add_calendar(new_event[0], 'msp')
else:
add_calendar(new_event, 'msp')
scraper.items+=1
for sE in senateEvents:
details = {}
dateTime = sE.xpath('.//*/b/text()')[0]
try:
title = sE.xpath('.//*/h3/a/text()')[0]
except:
title = sE.xpath('.//*/h3/text()')[0]
try:
link = "https://www.leg.mn.gov/" + sE.xpath('.//*/div[@class="float-right text-center mr-2"]/a/@href')[0]
except:
link = sE.xpath('.//*/h3/a/@href')[0]
location_list = sE.xpath('.//*/text()')
if 'Location: ' in location_list:
iN = location_list.index("Location: ")
details['location'] = location_list[iN + 1]
elif 'Senate Floor Session' in location_list:
details['location'] = 'Senate Floor Session'
venue, created = Organization.objects.get_or_create(name="MN Senate", city="St. Paul")
new_event = Event.objects.update_or_create(
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
venue = venue,
scraper = scraper
)
if type(new_event) is tuple:
print("STS: ", new_event)
add_calendar(new_event[0], 'msp')
else:
add_calendar(new_event, 'msp')
scraper.items+=1
for cE in commEvents:
details = {}
dateTime = cE.xpath('.//*/b/text()')[0]
try:
title = cE.xpath('.//*/h3/a/text()')[0]
except:
title = cE.xpath('.//*/h3/text()')[0]
try:
link = "https://www.leg.mn.gov/" + cE.xpath('.//*/div[@class="float-right text-center mr-2"]/a/@href')[0]
except:
link = cE.xpath('.//*/h3/a/@href')[0]
location_list = cE.xpath('.//*/text()')
if 'Room: ' in location_list:
iN = location_list.index("Room: ")
details['location'] = location_list[iN + 1]
# print(dateTime, title, link, details['location'])
venue, created = Organization.objects.get_or_create(name="MN Legislature", city="St. Paul")
new_event = Event.objects.update_or_create(
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
venue = venue,
scraper = scraper
)
if type(new_event) is tuple:
print("STS: ", new_event)
add_calendar(new_event[0], 'msp')
else:
add_calendar(new_event, 'msp')
scraper.items+=1
br.close()
scraper.save()
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()

View File

@@ -0,0 +1,122 @@
import re, os, sys
from datetime import datetime, timedelta
import requests
import json
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from events.models import Event, Organization, Scraper, Calendar
from time import sleep
from pprint import pprint as ppr
from selenium import webdriver as wd
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from xvfbwrapper import Xvfb
from lxml import html
import pytz
from events.digitools import getBrowser, createURL, createBasicEvent, getSource, add_calendar
import events.digitools as digitools
try:
scraper, created = Scraper.objects.get_or_create(
name="Mpls City Council",
website="https://lims.minneapolismn.gov/Calendar/citycouncil/upcoming",
calendar = Calendar.objects.get(shortcode='msp'),
items = 0,
new_items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name="Mpls City Council")
print("Scraper: ", scraper)
DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S'
tz = pytz.timezone("US/Central")
td = timedelta(days=2)
odt = datetime.now() - td
month = odt.strftime("%b")
day = int(datetime.now().day)
if day == 1:
day = 30
year = int(datetime.now().year)
cal_url = 'https://lims.minneapolismn.gov/Calendar/GetCalenderList?fromDate={}%20{},%20{}&toDate=null&meetingType=0&committeeId=null&pageCount=100&offsetStart=0&abbreviation=undefined&keywords=&sortOrder=1'.format(month, day, year)
# print("URL: ", cal_url)
# cal_url = 'https://lims.minneapolismn.gov/Calendar/GetCalenderList?fromDate=Nov%2015,%202025&toDate=null&meetingType=0&committeeId=null&pageCount=50&offsetStart=0&abbreviation=undefined&keywords=&sortOrder=1'
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(cal_url)
sleep(2)
ps = html.fromstring(br.page_source)
events = ps.xpath('.//body/pre/text()')[0]
new_events = json.loads(events)
# ppr(dict(new_events[0]))
event_list = []
for event in new_events:
e = {}
e['title'] = event['CommitteeName']
e['link'] = scraper.website
e['dateStamp'] = datetime.strptime(event['MeetingTime'], DATETIME_FORMAT)
e['agendaStatus'] = event['AgendaStatus']
e['address'] = event['Address']
e['description'] = event['Description']
e['scraper'] = scraper
e['calendars'] = [scraper.calendar]
try:
# print(event['Location'].split(",")[1])
loc = event['Location'].split(",")[1]
if "City Hall" in loc:
e['location'] = "Mpls City Hall"
venue = Organization.objects.get(
name="Mpls City Hall",
)
digitools.createBasicEvent(e, 'Gv', venue)
elif "Public Service Building" in loc:
e['location'] = "Mpls Public Service Building"
venue = Organization.objects.get(
name="Mpls Public Service Building",
)
digitools.createBasicEvent(e, 'Gv', venue)
except:
# print(event['Location'].split("-")[0])
e['location'] = event['Location'].split("-")[0].strip()
try:
venue, created = Organization.objects.get_or_create(
name=e['location'] ,
city="Minneapolis",
website=scraper.website,
is_venue=False,
address_complete=e['address']
)
except:
venue = Organization.objects.get(
name=e['location'] ,
city="Minneapolis",
)
digitools.createBasicEvent(e, 'Gv', venue)
event_list.append(e)
br.close()
scraper.save()
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()

View File

@@ -0,0 +1,82 @@
import re, os, sys
from datetime import datetime
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from events.models import Event, Organization, Scraper, Calendar
from time import sleep
from pprint import pprint as ppr
from selenium import webdriver as wd
from selenium.webdriver.common.by import By
from xvfbwrapper import Xvfb
from lxml import html
import pytz
from events.digitools import getBrowser, createURL, createBasicEvent, getSource, add_calendar
try:
scraper, created = Scraper.objects.get_or_create(
name="St Paul City Council",
website="https://www.stpaul.gov/calendar",
calendar = Calendar.objects.get(shortcode='msp'),
items = 0,
new_items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name="St Paul City Council")
print("Scraper: ", scraper)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%B %d, %Y at %I:%M %p'
calendar_url = 'https://www.stpaul.gov/calendar'
city_site = "https://www.stpaul.gov"
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(calendar_url)
sleep(3)
def getEvents(br):
ps = html.fromstring(br.page_source)
eventBlocks = ps.xpath('.//*/div[@class="calendar__item views-row"]')
for eB in eventBlocks:
title = eB.xpath('.//div/h3[@class="field-content calendar__title"]/text()')[0]
link = city_site + eB.xpath('.//div/span[@class="field-content calendar__link"]/a/@href')[0]
dateTime = eB.xpath('.//div[@class="views-field views-field-field-calendar-date-value"]/span/text()')[0]
print(dateTime, title, link)
print('\n\n++++\n\n')
venue, created = Organization.objects.get_or_create(name="Somewhere in St Paul", city="St. Paul")
new_event = Event.objects.update_or_create(
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT),
venue = venue,
scraper = scraper
)
add_calendar(new_event, 'msp')
getEvents(br)
sleep(5)
br.get("https://www.stpaul.gov/calendar?page=1")
getEvents(br)
br.close()
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()

120
Working/govt/mngov.py Normal file
View File

@@ -0,0 +1,120 @@
import requests, os, sys
from icalendar import Calendar as iCalendar, Event
from datetime import datetime
from dateutil import relativedelta
td = relativedelta.relativedelta(hours=5)
from pprint import pprint as ppr
import pytz
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from events.models import Event as DSEvent, Organization, Scraper, Calendar
import events.digitools as digitools
td = relativedelta.relativedelta(hours=5)
odt = datetime.now() + td
venue, created = Organization.objects.get_or_create(
name="MN Launch",
city="Minneapolis",
website="https://mn.gov/launchmn/calendar",
)
try:
scraper, created = Scraper.objects.get_or_create(
name=venue.name,
website=venue.website,
calendar = Calendar.objects.get(shortcode='msp'),
items = 0,
new_items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name=venue.name)
print("Scraper: ", scraper)
event_type = "Ed"
cal_url = "https://timelyapp.time.ly/api/calendars/54705514/export?format=ics&target=copy&start_date=2024-12-13"
calendar_url = 'https://calendar.google.com/calendar/ical/uvkshlggh1h4ck08emab22btkum9hl94%40import.calendar.google.com/public/basic.ics'
objIcalData = requests.get(cal_url)
gcal = iCalendar.from_ical(objIcalData.text)
cfpa_events = []
tz = pytz.timezone("US/Central")
for component in gcal.walk():
event = {}
event['strSummary'] = f"{(component.get('SUMMARY'))}"
event['strDesc'] = component.get('DESCRIPTION')
event['strLocation'] = component.get('LOCATION')
event['dateStart'] = component.get('DTSTART')
event['dateStamp'] = component.get('DTSTAMP')
if event['dateStamp'] is not None:
event['dateStamp'] = event['dateStart'].dt
if event['dateStart'] is not None:
try:
event['dateStart'] = event['dateStart'].dt
except Exception as e:
event['dateStart'] = event['dateStart'].dt
event['dateEnd'] = (component.get('DTEND'))
if event['dateEnd'] is not None:
event['dateEnd'] = event['dateEnd'].dt
else:
event['dateEnd'] = event['dateStart']
if event['strSummary'] != 'None':
event['details'] = {
"description" : event['strDesc'],
"Location" : event['strLocation'],
}
cfpa_events.append(event)
now_now = datetime.now().astimezone(tz)
try:
if event['dateStart'] > now_now:
print(event['strSummary'])
new_event, created = DSEvent.objects.update_or_create(
event_type = event_type,
show_title = event['strSummary'],
show_link = venue.website,
show_date = event['dateStart']-td,
show_day = event['dateStart']-td,
more_details = event["details"],
venue = venue
)
digitools.add_calendar(new_event, 'msp')
scraper.items+=1
if event['strLocation'] != None and event['strLocation'] != 'MN' and event['strLocation'] != 'Online':
loc = event['strLocation'].split('@')
new_venue_name = loc[0]
if len(loc) > 1:
address = loc[1].split(",")
city = address[1].strip()
new_venue, created = Organization.objects.get_or_create(
name=new_venue_name,
city=city,
website="https://mn.gov/launchmn/calendar",
)
new_event.venue = new_venue
new_event.save()
else:
new_event.venue = venue
new_event.save()
except Exception as e:
print(e)
print("Event: ", event['dateStart'], event['strSummary'])
print("Clock: ", now_now)
else:
print("Failed: ", component.get('DESCRIPTION'))
scraper.save()