Files
scrapers/Working/govt/mn.stp.citycouncil.py

82 lines
2.4 KiB
Python
Raw Normal View History

2025-11-30 16:29:50 -05:00
import re, os, sys
from datetime import datetime
2026-01-09 22:49:19 -05:00
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
2025-11-30 16:29:50 -05:00
from events.models import Event, Organization, Scraper, Calendar
from time import sleep
from pprint import pprint as ppr
from selenium import webdriver as wd
from selenium.webdriver.common.by import By
from xvfbwrapper import Xvfb
from lxml import html
import pytz
from events.digitools import getBrowser, createURL, createBasicEvent, getSource, add_calendar
try:
scraper, created = Scraper.objects.get_or_create(
name="St Paul City Council",
website="https://www.stpaul.gov/calendar",
calendar = Calendar.objects.get(shortcode='msp'),
items = 0,
new_items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name="St Paul City Council")
print("Scraper: ", scraper)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%B %d, %Y at %I:%M %p'
calendar_url = 'https://www.stpaul.gov/calendar'
city_site = "https://www.stpaul.gov"
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(calendar_url)
sleep(3)
def getEvents(br):
ps = html.fromstring(br.page_source)
eventBlocks = ps.xpath('.//*/div[@class="calendar__item views-row"]')
for eB in eventBlocks:
title = eB.xpath('.//div/h3[@class="field-content calendar__title"]/text()')[0]
link = city_site + eB.xpath('.//div/span[@class="field-content calendar__link"]/a/@href')[0]
dateTime = eB.xpath('.//div[@class="views-field views-field-field-calendar-date-value"]/span/text()')[0]
print(dateTime, title, link)
print('\n\n++++\n\n')
venue, created = Organization.objects.get_or_create(name="Somewhere in St Paul", city="St. Paul")
new_event = Event.objects.update_or_create(
event_type = 'Gv',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT),
venue = venue,
scraper = scraper
)
add_calendar(new_event, 'msp')
getEvents(br)
sleep(5)
br.get("https://www.stpaul.gov/calendar?page=1")
getEvents(br)
br.close()
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()