moved scrapers into app_dir events

This commit is contained in:
2025-10-11 03:55:16 -05:00
parent 187c2d1cd7
commit 1adc692756
67 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,99 @@
import re, os, sys
from datetime import datetime
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from events.models import Event, Organization, Scraper
from time import sleep
from pprint import pprint as ppr
from selenium import webdriver as wd
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from xvfbwrapper import Xvfb
from lxml import html
import pytz
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
try:
scraper, created = Scraper.objects.get_or_create(
name="Mpls City Council",
website="https://lims.minneapolismn.gov/Calendar/citycouncil/upcoming",
items = 0,
last_ran = datetime.now(),
)
except Exception as e:
print(e)
scraper = Scraper.objects.get(name="Mpls City Council")
print("Scraper: ", scraper)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%A, %b %d, %Y %I:%M %p'
calendar_url = 'https://lims.minneapolismn.gov/Calendar/citycouncil/upcoming'
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(calendar_url)
sleep(25)
# br.find_element(By.XPATH, '//*/li[@class="tab-header-small"]/a').click()
# sleep(15)
# all_entries = Select(br.find_element(By.XPATH, '//*/select'))
# all_entries.select_by_value('50')
# sleep(15)
ps = html.fromstring(br.page_source)
dayBlocks = ps.xpath('.//*/div[@class="ng-scope"]')
meetings = []
for dB in dayBlocks:
date = dB.xpath('.//div[@class="row"]/div/span[@class="ng-binding"]/text()')[0]
events = dB.xpath('.//div[@class="upcoming ng-scope"]/div')
for event in events:
time = event.xpath('.//div/text()')[0]
title = event.xpath('.//div/a/text()')[0].strip()
if not len(title) > 0:
title = event.xpath('.//div/span/a/text()')[0].strip()
link = event.xpath('.//div/a/@href')[0]
if link.startswith("/Download/"):
link = calendar_url
else:
link = "https://lims.minneapolismn.gov" + link
location = title.split(',')[-1].strip()
mtg_title = title.split(',')[:-1]
if len(mtg_title) > 1:
mtg_title = (' -').join(mtg_title).strip()
else:
mtg_title = mtg_title[0].strip()
dateTime = datetime.strptime(date + " " + time, DATETIME_FORMAT)
if location == "City Hall":
location = "Mpls City Hall"
print(dateTime, location, mtg_title, link)
print('\n\n++++\n\n')
venue, created = Organization.objects.get_or_create(name=location, city="Minneapolis")
new_event = Event.objects.update_or_create(
calendar = 'msp'
event_type = 'Gv',
show_title = mtg_title,
show_link = link,
show_date = dateTime,
show_day = dateTime,
venue = venue
)
scraper.items+=1
br.close()
scraper.save()
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()