Files
scrapers/Working/govt/mn.legislature.py

167 lines
5.3 KiB
Python
Raw Normal View History

2025-11-30 16:29:50 -05:00
# Install Chromedriver and Quarantine
# xattr -d com.apple.quarantine <name-of-executable>
import os, sys
from datetime import datetime
from dateutil import relativedelta
2026-01-09 22:49:19 -05:00
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
2025-11-30 16:29:50 -05:00
from time import sleep
from pprint import pprint as ppr
from selenium import webdriver as wd
from xvfbwrapper import Xvfb
from lxml import html
import pytz
from events.models import Event, Organization, Scraper, Calendar
from events.digitools import getBrowser, createURL, createBasicEvent, getSource, add_calendar
try:
scraper, created = Scraper.objects.get_or_create(
name="MN Legislature",
website="https://www.leg.mn.gov/cal?type=all",
calendar = Calendar.objects.get(shortcode='msp'),
items = 0,
new_items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name="MN Legislature")
print("Scraper: ", scraper)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
# Set initial variables for City, etc
calendar_url = 'https://www.leg.mn.gov/cal?type=all'
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(calendar_url)
sleep(10)
ps = html.fromstring(br.page_source)
commEvents = ps.xpath('.//*/div[@class="card border-dark comm_item cal_item ml-lg-3"]')
senateEvents = ps.xpath('.//*/div[@class="card border-dark senate_item cal_item ml-lg-3"]')
houseEvents = ps.xpath('.//*/div[@class="card border-dark house_item cal_item ml-lg-3"]')
meetings = []
for hE in houseEvents:
details = {}
dateTime = hE.xpath('.//*/b/text()')[0]
try:
title = hE.xpath('.//*/h3/a/text()')[0]
except:
title = hE.xpath('.//*/h3/text()')[0]
try:
link = "https://www.leg.mn.gov/" + hE.xpath('.//*/div[@class="float-right text-center mr-2 d-print-none"]/a/@href')[0]
except:
link = hE.xpath('.//*/h3/a/@href')[0]
details['location'] = hE.xpath('.//*/div[@class=""]/text()')[0]
# print(dateTime, title, link, details['location'])
venue, created = Organization.objects.get_or_create(name="MN House", city="St. Paul")
new_event, created = Event.objects.update_or_create(
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
venue = venue,
scraper = scraper
)
if type(new_event) is tuple:
print("STS: ", new_event)
add_calendar(new_event[0], 'msp')
else:
add_calendar(new_event, 'msp')
scraper.items+=1
for sE in senateEvents:
details = {}
dateTime = sE.xpath('.//*/b/text()')[0]
try:
title = sE.xpath('.//*/h3/a/text()')[0]
except:
title = sE.xpath('.//*/h3/text()')[0]
try:
link = "https://www.leg.mn.gov/" + sE.xpath('.//*/div[@class="float-right text-center mr-2"]/a/@href')[0]
except:
link = sE.xpath('.//*/h3/a/@href')[0]
location_list = sE.xpath('.//*/text()')
if 'Location: ' in location_list:
iN = location_list.index("Location: ")
details['location'] = location_list[iN + 1]
elif 'Senate Floor Session' in location_list:
details['location'] = 'Senate Floor Session'
venue, created = Organization.objects.get_or_create(name="MN Senate", city="St. Paul")
new_event = Event.objects.update_or_create(
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
venue = venue,
scraper = scraper
)
if type(new_event) is tuple:
print("STS: ", new_event)
add_calendar(new_event[0], 'msp')
else:
add_calendar(new_event, 'msp')
scraper.items+=1
for cE in commEvents:
details = {}
dateTime = cE.xpath('.//*/b/text()')[0]
try:
title = cE.xpath('.//*/h3/a/text()')[0]
except:
title = cE.xpath('.//*/h3/text()')[0]
try:
link = "https://www.leg.mn.gov/" + cE.xpath('.//*/div[@class="float-right text-center mr-2"]/a/@href')[0]
except:
link = cE.xpath('.//*/h3/a/@href')[0]
location_list = cE.xpath('.//*/text()')
if 'Room: ' in location_list:
iN = location_list.index("Room: ")
details['location'] = location_list[iN + 1]
# print(dateTime, title, link, details['location'])
venue, created = Organization.objects.get_or_create(name="MN Legislature", city="St. Paul")
new_event = Event.objects.update_or_create(
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
venue = venue,
scraper = scraper
)
if type(new_event) is tuple:
print("STS: ", new_event)
add_calendar(new_event[0], 'msp')
else:
add_calendar(new_event, 'msp')
scraper.items+=1
br.close()
scraper.save()
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()