Files
api.digisnaxx/event_scrapers/Working/govt/MNLeg.py

148 lines
4.7 KiB
Python
Raw Normal View History

2025-10-11 03:50:49 -05:00
# Install Chromedriver and Quarantine
# xattr -d com.apple.quarantine <name-of-executable>
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
from selenium import webdriver as wd
from xvfbwrapper import Xvfb
from lxml import html
import pytz
from events.models import Event, Organization, Scraper
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
try:
scraper, created = Scraper.objects.get_or_create(
name="MN Legislature",
website="https://www.leg.mn.gov/cal?type=all",
items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name="MN Legislature")
print("Scraper: ", scraper)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
# Set initial variables for City, etc
calendar_url = 'https://www.leg.mn.gov/cal?type=all'
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(calendar_url)
sleep(60)
ps = html.fromstring(br.page_source)
commEvents = ps.xpath('.//*/div[@class="card border-dark comm_item cal_item ml-lg-3"]')
senateEvents = ps.xpath('.//*/div[@class="card border-dark senate_item cal_item ml-lg-3"]')
houseEvents = ps.xpath('.//*/div[@class="card border-dark house_item cal_item ml-lg-3"]')
meetings = []
for hE in houseEvents:
details = {}
dateTime = hE.xpath('.//*/b/text()')[0]
try:
title = hE.xpath('.//*/h3/a/text()')[0]
except:
title = hE.xpath('.//*/h3/text()')[0]
try:
link = "https://www.leg.mn.gov/" + hE.xpath('.//*/div[@class="float-right text-center mr-2 d-print-none"]/a/@href')[0]
except:
link = hE.xpath('.//*/h3/a/@href')[0]
details['location'] = hE.xpath('.//*/div[@class=""]/text()')[0]
# print(dateTime, title, link, details['location'])
venue, created = Organization.objects.get_or_create(name="MN House", city="St. Paul")
new_event, created = Event.objects.update_or_create(
calendar = 'msp'
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
venue = venue
)
scraper.items+=1
for sE in senateEvents:
details = {}
dateTime = sE.xpath('.//*/b/text()')[0]
try:
title = sE.xpath('.//*/h3/a/text()')[0]
except:
title = sE.xpath('.//*/h3/text()')[0]
try:
link = "https://www.leg.mn.gov/" + sE.xpath('.//*/div[@class="float-right text-center mr-2"]/a/@href')[0]
except:
link = sE.xpath('.//*/h3/a/@href')[0]
location_list = sE.xpath('.//*/text()')
if 'Location: ' in location_list:
iN = location_list.index("Location: ")
details['location'] = location_list[iN + 1]
elif 'Senate Floor Session' in location_list:
details['location'] = 'Senate Floor Session'
venue, created = Organization.objects.get_or_create(name="MN Senate", city="St. Paul")
new_event = Event.objects.update_or_create(
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
venue = venue
)
scraper.items+=1
for cE in commEvents:
details = {}
dateTime = cE.xpath('.//*/b/text()')[0]
try:
title = cE.xpath('.//*/h3/a/text()')[0]
except:
title = cE.xpath('.//*/h3/text()')[0]
try:
link = "https://www.leg.mn.gov/" + cE.xpath('.//*/div[@class="float-right text-center mr-2"]/a/@href')[0]
except:
link = cE.xpath('.//*/h3/a/@href')[0]
location_list = cE.xpath('.//*/text()')
if 'Room: ' in location_list:
iN = location_list.index("Room: ")
details['location'] = location_list[iN + 1]
# print(dateTime, title, link, details['location'])
venue, created = Organization.objects.get_or_create(name="MN Legislature", city="St. Paul")
new_event = Event.objects.update_or_create(
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
venue = venue
)
scraper.items+=1
br.close()
scraper.save()
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()