lots of updates :/

This commit is contained in:
2025-11-30 15:34:08 -05:00
parent 61e198ff55
commit e06a5d49f8
84 changed files with 6614 additions and 2297 deletions

View File

@@ -6,8 +6,8 @@ from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
@@ -18,14 +18,16 @@ from xvfbwrapper import Xvfb
from lxml import html
import pytz
from events.models import Event, Organization, Scraper
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
from events.models import Event, Organization, Scraper, Calendar
from events.digitools import getBrowser, createURL, createBasicEvent, getSource, add_calendar
try:
scraper, created = Scraper.objects.get_or_create(
name="MN Legislature",
website="https://www.leg.mn.gov/cal?type=all",
calendar = Calendar.objects.get(shortcode='msp'),
items = 0,
new_items = 0,
last_ran = datetime.now(),
)
except Exception as e:
@@ -48,7 +50,7 @@ else:
quit()
br.get(calendar_url)
sleep(60)
sleep(10)
ps = html.fromstring(br.page_source)
commEvents = ps.xpath('.//*/div[@class="card border-dark comm_item cal_item ml-lg-3"]')
@@ -71,15 +73,21 @@ for hE in houseEvents:
# print(dateTime, title, link, details['location'])
venue, created = Organization.objects.get_or_create(name="MN House", city="St. Paul")
new_event, created = Event.objects.update_or_create(
calendar = 'msp'
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
venue = venue
venue = venue,
scraper = scraper
)
if type(new_event) is tuple:
print("STS: ", new_event)
add_calendar(new_event[0], 'msp')
else:
add_calendar(new_event, 'msp')
scraper.items+=1
@@ -108,8 +116,14 @@ for sE in senateEvents:
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
venue = venue
venue = venue,
scraper = scraper
)
if type(new_event) is tuple:
print("STS: ", new_event)
add_calendar(new_event[0], 'msp')
else:
add_calendar(new_event, 'msp')
scraper.items+=1
for cE in commEvents:
@@ -136,8 +150,14 @@ for cE in commEvents:
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = details['location'],
venue = venue
venue = venue,
scraper = scraper
)
if type(new_event) is tuple:
print("STS: ", new_event)
add_calendar(new_event[0], 'msp')
else:
add_calendar(new_event, 'msp')
scraper.items+=1

View File

@@ -1,12 +1,14 @@
import re, os, sys
from datetime import datetime
from datetime import datetime, timedelta
import requests
import json
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from events.models import Event, Organization, Scraper
from events.models import Event, Organization, Scraper, Calendar
from time import sleep
from pprint import pprint as ppr
@@ -17,13 +19,17 @@ from selenium.webdriver.common.by import By
from xvfbwrapper import Xvfb
from lxml import html
import pytz
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
from events.digitools import getBrowser, createURL, createBasicEvent, getSource, add_calendar
import events.digitools as digitools
try:
scraper, created = Scraper.objects.get_or_create(
name="Mpls City Council",
website="https://lims.minneapolismn.gov/Calendar/citycouncil/upcoming",
calendar = Calendar.objects.get(shortcode='msp'),
items = 0,
new_items = 0,
last_ran = datetime.now(),
)
except Exception as e:
@@ -31,11 +37,24 @@ except Exception as e:
scraper = Scraper.objects.get(name="Mpls City Council")
print("Scraper: ", scraper)
DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S'
tz = pytz.timezone("US/Central")
td = timedelta(days=2)
odt = datetime.now() - td
DATETIME_FORMAT = '%A, %b %d, %Y %I:%M %p'
month = odt.strftime("%b")
day = int(datetime.now().day)
calendar_url = 'https://lims.minneapolismn.gov/Calendar/citycouncil/upcoming'
if day == 1:
day = 30
year = int(datetime.now().year)
cal_url = 'https://lims.minneapolismn.gov/Calendar/GetCalenderList?fromDate={}%20{},%20{}&toDate=null&meetingType=0&committeeId=null&pageCount=100&offsetStart=0&abbreviation=undefined&keywords=&sortOrder=1'.format(month, day, year)
# print("URL: ", cal_url)
# cal_url = 'https://lims.minneapolismn.gov/Calendar/GetCalenderList?fromDate=Nov%2015,%202025&toDate=null&meetingType=0&committeeId=null&pageCount=50&offsetStart=0&abbreviation=undefined&keywords=&sortOrder=1'
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
@@ -44,56 +63,60 @@ else:
print("No run_env")
quit()
br.get(calendar_url)
sleep(25)
# br.find_element(By.XPATH, '//*/li[@class="tab-header-small"]/a').click()
# sleep(15)
# all_entries = Select(br.find_element(By.XPATH, '//*/select'))
# all_entries.select_by_value('50')
# sleep(15)
br.get(cal_url)
sleep(2)
ps = html.fromstring(br.page_source)
events = ps.xpath('.//body/pre/text()')[0]
new_events = json.loads(events)
# ppr(dict(new_events[0]))
event_list = []
dayBlocks = ps.xpath('.//*/div[@class="ng-scope"]')
meetings = []
for dB in dayBlocks:
date = dB.xpath('.//div[@class="row"]/div/span[@class="ng-binding"]/text()')[0]
events = dB.xpath('.//div[@class="upcoming ng-scope"]/div')
for event in events:
time = event.xpath('.//div/text()')[0]
title = event.xpath('.//div/a/text()')[0].strip()
if not len(title) > 0:
title = event.xpath('.//div/span/a/text()')[0].strip()
link = event.xpath('.//div/a/@href')[0]
if link.startswith("/Download/"):
link = calendar_url
else:
link = "https://lims.minneapolismn.gov" + link
location = title.split(',')[-1].strip()
mtg_title = title.split(',')[:-1]
if len(mtg_title) > 1:
mtg_title = (' -').join(mtg_title).strip()
else:
mtg_title = mtg_title[0].strip()
dateTime = datetime.strptime(date + " " + time, DATETIME_FORMAT)
if location == "City Hall":
location = "Mpls City Hall"
print(dateTime, location, mtg_title, link)
print('\n\n++++\n\n')
venue, created = Organization.objects.get_or_create(name=location, city="Minneapolis")
new_event = Event.objects.update_or_create(
calendar = 'msp'
event_type = 'Gv',
show_title = mtg_title,
show_link = link,
show_date = dateTime,
show_day = dateTime,
venue = venue
)
scraper.items+=1
for event in new_events:
e = {}
e['title'] = event['CommitteeName']
e['link'] = scraper.website
e['dateStamp'] = datetime.strptime(event['MeetingTime'], DATETIME_FORMAT)
e['agendaStatus'] = event['AgendaStatus']
e['address'] = event['Address']
e['description'] = event['Description']
e['scraper'] = scraper
e['calendars'] = [scraper.calendar]
try:
# print(event['Location'].split(",")[1])
loc = event['Location'].split(",")[1]
if "City Hall" in loc:
e['location'] = "Mpls City Hall"
venue = Organization.objects.get(
name="Mpls City Hall",
)
digitools.createBasicEvent(e, 'Gv', venue)
elif "Public Service Building" in loc:
e['location'] = "Mpls Public Service Building"
venue = Organization.objects.get(
name="Mpls Public Service Building",
)
digitools.createBasicEvent(e, 'Gv', venue)
except:
# print(event['Location'].split("-")[0])
e['location'] = event['Location'].split("-")[0].strip()
try:
venue, created = Organization.objects.get_or_create(
name=e['location'] ,
city="Minneapolis",
website=scraper.website,
is_venue=False,
address_complete=e['address']
)
except:
venue = Organization.objects.get(
name=e['location'] ,
city="Minneapolis",
)
digitools.createBasicEvent(e, 'Gv', venue)
event_list.append(e)
br.close()
scraper.save()
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()

View File

@@ -2,11 +2,11 @@ import re, os, sys
from datetime import datetime
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from events.models import Event, Organization, Scraper
from events.models import Event, Organization, Scraper, Calendar
from time import sleep
from pprint import pprint as ppr
@@ -16,13 +16,21 @@ from selenium.webdriver.common.by import By
from xvfbwrapper import Xvfb
from lxml import html
import pytz
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
from events.digitools import getBrowser, createURL, createBasicEvent, getSource, add_calendar
scraper, created = Scraper.objects.get_or_create(
name="St Paul City Council",
website="https://www.stpaul.gov/calendar",
last_ran = datetime.now(),
try:
scraper, created = Scraper.objects.get_or_create(
name="St Paul City Council",
website="https://www.stpaul.gov/calendar",
calendar = Calendar.objects.get(shortcode='msp'),
items = 0,
new_items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name="St Paul City Council")
print("Scraper: ", scraper)
tz = pytz.timezone("US/Central")
@@ -54,14 +62,15 @@ def getEvents(br):
print('\n\n++++\n\n')
venue, created = Organization.objects.get_or_create(name="Somewhere in St Paul", city="St. Paul")
new_event = Event.objects.update_or_create(
calendar = 'msp'
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT),
venue = venue
venue = venue,
scraper = scraper
)
add_calendar(new_event, 'msp')
getEvents(br)
sleep(5)

View File

@@ -8,11 +8,13 @@ from pprint import pprint as ppr
import pytz
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from events.models import Event as DSEvent, Organization, Scraper
from events.models import Event as DSEvent, Organization, Scraper, Calendar
import events.digitools as digitools
td = relativedelta.relativedelta(hours=5)
odt = datetime.now() + td
@@ -27,7 +29,9 @@ try:
scraper, created = Scraper.objects.get_or_create(
name=venue.name,
website=venue.website,
calendar = Calendar.objects.get(shortcode='msp'),
items = 0,
new_items = 0,
last_ran = datetime.now(),
)
except Exception as e:
@@ -78,7 +82,6 @@ for component in gcal.walk():
if event['dateStart'] > now_now:
print(event['strSummary'])
new_event, created = DSEvent.objects.update_or_create(
calendar = 'msp'
event_type = event_type,
show_title = event['strSummary'],
show_link = venue.website,
@@ -87,6 +90,7 @@ for component in gcal.walk():
more_details = event["details"],
venue = venue
)
digitools.add_calendar(new_event, 'msp')
scraper.items+=1
if event['strLocation'] != None and event['strLocation'] != 'MN' and event['strLocation'] != 'Online':
loc = event['strLocation'].split('@')