Files
api.digisnaxx/events/scrapers/Working/govt/MplsCityCouncil.py

123 lines
3.8 KiB
Python
Raw Normal View History

2025-10-11 03:50:49 -05:00
import re, os, sys
2025-11-30 15:34:08 -05:00
from datetime import datetime, timedelta
import requests
import json
2025-10-11 03:50:49 -05:00
import django
2025-11-30 15:34:08 -05:00
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
2025-10-11 03:50:49 -05:00
django.setup()
2025-11-30 15:34:08 -05:00
from events.models import Event, Organization, Scraper, Calendar
2025-10-11 03:50:49 -05:00
from time import sleep
from pprint import pprint as ppr
from selenium import webdriver as wd
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from xvfbwrapper import Xvfb
from lxml import html
import pytz
2025-11-30 15:34:08 -05:00
from events.digitools import getBrowser, createURL, createBasicEvent, getSource, add_calendar
import events.digitools as digitools
2025-10-11 03:50:49 -05:00
try:
scraper, created = Scraper.objects.get_or_create(
name="Mpls City Council",
website="https://lims.minneapolismn.gov/Calendar/citycouncil/upcoming",
2025-11-30 15:34:08 -05:00
calendar = Calendar.objects.get(shortcode='msp'),
2025-10-11 03:50:49 -05:00
items = 0,
2025-11-30 15:34:08 -05:00
new_items = 0,
2025-10-11 03:50:49 -05:00
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name="Mpls City Council")
print("Scraper: ", scraper)
2025-11-30 15:34:08 -05:00
DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S'
2025-10-11 03:50:49 -05:00
tz = pytz.timezone("US/Central")
2025-11-30 15:34:08 -05:00
td = timedelta(days=2)
odt = datetime.now() - td
month = odt.strftime("%b")
day = int(datetime.now().day)
if day == 1:
day = 30
year = int(datetime.now().year)
2025-10-11 03:50:49 -05:00
2025-11-30 15:34:08 -05:00
cal_url = 'https://lims.minneapolismn.gov/Calendar/GetCalenderList?fromDate={}%20{},%20{}&toDate=null&meetingType=0&committeeId=null&pageCount=100&offsetStart=0&abbreviation=undefined&keywords=&sortOrder=1'.format(month, day, year)
2025-10-11 03:50:49 -05:00
2025-11-30 15:34:08 -05:00
# print("URL: ", cal_url)
# cal_url = 'https://lims.minneapolismn.gov/Calendar/GetCalenderList?fromDate=Nov%2015,%202025&toDate=null&meetingType=0&committeeId=null&pageCount=50&offsetStart=0&abbreviation=undefined&keywords=&sortOrder=1'
2025-10-11 03:50:49 -05:00
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = getBrowser(arg1)
else:
print("No run_env")
quit()
2025-11-30 15:34:08 -05:00
br.get(cal_url)
sleep(2)
2025-10-11 03:50:49 -05:00
ps = html.fromstring(br.page_source)
2025-11-30 15:34:08 -05:00
events = ps.xpath('.//body/pre/text()')[0]
new_events = json.loads(events)
# ppr(dict(new_events[0]))
event_list = []
2025-10-11 03:50:49 -05:00
2025-11-30 15:34:08 -05:00
for event in new_events:
e = {}
e['title'] = event['CommitteeName']
e['link'] = scraper.website
e['dateStamp'] = datetime.strptime(event['MeetingTime'], DATETIME_FORMAT)
e['agendaStatus'] = event['AgendaStatus']
e['address'] = event['Address']
e['description'] = event['Description']
e['scraper'] = scraper
e['calendars'] = [scraper.calendar]
try:
# print(event['Location'].split(",")[1])
loc = event['Location'].split(",")[1]
if "City Hall" in loc:
e['location'] = "Mpls City Hall"
venue = Organization.objects.get(
name="Mpls City Hall",
)
digitools.createBasicEvent(e, 'Gv', venue)
elif "Public Service Building" in loc:
e['location'] = "Mpls Public Service Building"
venue = Organization.objects.get(
name="Mpls Public Service Building",
)
digitools.createBasicEvent(e, 'Gv', venue)
except:
# print(event['Location'].split("-")[0])
e['location'] = event['Location'].split("-")[0].strip()
try:
venue, created = Organization.objects.get_or_create(
name=e['location'] ,
city="Minneapolis",
website=scraper.website,
is_venue=False,
address_complete=e['address']
)
except:
venue = Organization.objects.get(
name=e['location'] ,
city="Minneapolis",
)
digitools.createBasicEvent(e, 'Gv', venue)
event_list.append(e)
2025-10-11 03:50:49 -05:00
br.close()
2025-11-30 15:34:08 -05:00
2025-10-11 03:50:49 -05:00
scraper.save()
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()