Files
api.digisnaxx/events/scrapers/Working/govt/MplsCityCouncil.py
2025-11-30 15:34:08 -05:00

123 lines
3.8 KiB
Python

import re, os, sys
from datetime import datetime, timedelta
import requests
import json
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from events.models import Event, Organization, Scraper, Calendar
from time import sleep
from pprint import pprint as ppr
from selenium import webdriver as wd
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from xvfbwrapper import Xvfb
from lxml import html
import pytz
from events.digitools import getBrowser, createURL, createBasicEvent, getSource, add_calendar
import events.digitools as digitools
try:
scraper, created = Scraper.objects.get_or_create(
name="Mpls City Council",
website="https://lims.minneapolismn.gov/Calendar/citycouncil/upcoming",
calendar = Calendar.objects.get(shortcode='msp'),
items = 0,
new_items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name="Mpls City Council")
print("Scraper: ", scraper)
DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S'
tz = pytz.timezone("US/Central")
td = timedelta(days=2)
odt = datetime.now() - td
month = odt.strftime("%b")
day = int(datetime.now().day)
if day == 1:
day = 30
year = int(datetime.now().year)
cal_url = 'https://lims.minneapolismn.gov/Calendar/GetCalenderList?fromDate={}%20{},%20{}&toDate=null&meetingType=0&committeeId=null&pageCount=100&offsetStart=0&abbreviation=undefined&keywords=&sortOrder=1'.format(month, day, year)
# print("URL: ", cal_url)
# cal_url = 'https://lims.minneapolismn.gov/Calendar/GetCalenderList?fromDate=Nov%2015,%202025&toDate=null&meetingType=0&committeeId=null&pageCount=50&offsetStart=0&abbreviation=undefined&keywords=&sortOrder=1'
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(cal_url)
sleep(2)
ps = html.fromstring(br.page_source)
events = ps.xpath('.//body/pre/text()')[0]
new_events = json.loads(events)
# ppr(dict(new_events[0]))
event_list = []
for event in new_events:
e = {}
e['title'] = event['CommitteeName']
e['link'] = scraper.website
e['dateStamp'] = datetime.strptime(event['MeetingTime'], DATETIME_FORMAT)
e['agendaStatus'] = event['AgendaStatus']
e['address'] = event['Address']
e['description'] = event['Description']
e['scraper'] = scraper
e['calendars'] = [scraper.calendar]
try:
# print(event['Location'].split(",")[1])
loc = event['Location'].split(",")[1]
if "City Hall" in loc:
e['location'] = "Mpls City Hall"
venue = Organization.objects.get(
name="Mpls City Hall",
)
digitools.createBasicEvent(e, 'Gv', venue)
elif "Public Service Building" in loc:
e['location'] = "Mpls Public Service Building"
venue = Organization.objects.get(
name="Mpls Public Service Building",
)
digitools.createBasicEvent(e, 'Gv', venue)
except:
# print(event['Location'].split("-")[0])
e['location'] = event['Location'].split("-")[0].strip()
try:
venue, created = Organization.objects.get_or_create(
name=e['location'] ,
city="Minneapolis",
website=scraper.website,
is_venue=False,
address_complete=e['address']
)
except:
venue = Organization.objects.get(
name=e['location'] ,
city="Minneapolis",
)
digitools.createBasicEvent(e, 'Gv', venue)
event_list.append(e)
br.close()
scraper.save()
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()