122 lines
3.7 KiB
Python
122 lines
3.7 KiB
Python
import re, os, sys
|
|
from datetime import datetime, timedelta
|
|
import requests
|
|
import json
|
|
|
|
sys.path.append('/var/www/digisnaxx.ado/scrapers')
|
|
import dtss
|
|
dtss.getReady()
|
|
|
|
from events.models import Event, Organization, Scraper, Calendar
|
|
|
|
from time import sleep
|
|
from pprint import pprint as ppr
|
|
from selenium import webdriver as wd
|
|
from selenium.webdriver.support.ui import Select
|
|
from selenium.webdriver.common.by import By
|
|
|
|
from xvfbwrapper import Xvfb
|
|
from lxml import html
|
|
import pytz
|
|
from events.digitools import getBrowser, createURL, createBasicEvent, getSource, add_calendar
|
|
|
|
import events.digitools as digitools
|
|
|
|
try:
|
|
scraper, created = Scraper.objects.get_or_create(
|
|
name="Mpls City Council",
|
|
website="https://lims.minneapolismn.gov/Calendar/citycouncil/upcoming",
|
|
calendar = Calendar.objects.get(shortcode='msp'),
|
|
items = 0,
|
|
new_items = 0,
|
|
last_ran = datetime.now(),
|
|
)
|
|
except Exception as e:
|
|
print(e)
|
|
scraper = Scraper.objects.get(name="Mpls City Council")
|
|
print("Scraper: ", scraper)
|
|
|
|
DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S'
|
|
tz = pytz.timezone("US/Central")
|
|
td = timedelta(days=2)
|
|
odt = datetime.now() - td
|
|
|
|
month = odt.strftime("%b")
|
|
day = int(datetime.now().day)
|
|
|
|
if day == 1:
|
|
day = 30
|
|
|
|
year = int(datetime.now().year)
|
|
|
|
cal_url = 'https://lims.minneapolismn.gov/Calendar/GetCalenderList?fromDate={}%20{},%20{}&toDate=null&meetingType=0&committeeId=null&pageCount=100&offsetStart=0&abbreviation=undefined&keywords=&sortOrder=1'.format(month, day, year)
|
|
|
|
# print("URL: ", cal_url)
|
|
|
|
# cal_url = 'https://lims.minneapolismn.gov/Calendar/GetCalenderList?fromDate=Nov%2015,%202025&toDate=null&meetingType=0&committeeId=null&pageCount=50&offsetStart=0&abbreviation=undefined&keywords=&sortOrder=1'
|
|
|
|
if len(sys.argv) >= 2:
|
|
arg1 = sys.argv[1]
|
|
br = getBrowser(arg1)
|
|
else:
|
|
print("No run_env")
|
|
quit()
|
|
|
|
br.get(cal_url)
|
|
sleep(2)
|
|
ps = html.fromstring(br.page_source)
|
|
events = ps.xpath('.//body/pre/text()')[0]
|
|
new_events = json.loads(events)
|
|
# ppr(dict(new_events[0]))
|
|
event_list = []
|
|
|
|
for event in new_events:
|
|
e = {}
|
|
e['title'] = event['CommitteeName']
|
|
e['link'] = scraper.website
|
|
e['dateStamp'] = datetime.strptime(event['MeetingTime'], DATETIME_FORMAT)
|
|
e['agendaStatus'] = event['AgendaStatus']
|
|
e['address'] = event['Address']
|
|
e['description'] = event['Description']
|
|
e['scraper'] = scraper
|
|
e['calendars'] = [scraper.calendar]
|
|
try:
|
|
# print(event['Location'].split(",")[1])
|
|
loc = event['Location'].split(",")[1]
|
|
if "City Hall" in loc:
|
|
e['location'] = "Mpls City Hall"
|
|
venue = Organization.objects.get(
|
|
name="Mpls City Hall",
|
|
)
|
|
digitools.createBasicEvent(e, 'Gv', venue)
|
|
elif "Public Service Building" in loc:
|
|
e['location'] = "Mpls Public Service Building"
|
|
venue = Organization.objects.get(
|
|
name="Mpls Public Service Building",
|
|
)
|
|
digitools.createBasicEvent(e, 'Gv', venue)
|
|
except:
|
|
# print(event['Location'].split("-")[0])
|
|
e['location'] = event['Location'].split("-")[0].strip()
|
|
try:
|
|
venue, created = Organization.objects.get_or_create(
|
|
name=e['location'] ,
|
|
city="Minneapolis",
|
|
website=scraper.website,
|
|
is_venue=False,
|
|
address_complete=e['address']
|
|
)
|
|
except:
|
|
venue = Organization.objects.get(
|
|
name=e['location'] ,
|
|
city="Minneapolis",
|
|
)
|
|
digitools.createBasicEvent(e, 'Gv', venue)
|
|
event_list.append(e)
|
|
|
|
|
|
br.close()
|
|
|
|
scraper.save()
|
|
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()
|