import re, os, sys from datetime import datetime import django sys.path.append('../../../') os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings' django.setup() from events.models import Event, Organization, Scraper from time import sleep from pprint import pprint as ppr from selenium import webdriver as wd from selenium.webdriver.common.by import By from xvfbwrapper import Xvfb from lxml import html import pytz from events.digitools import getBrowser, createURL, createBasicEvent, getSource scraper, created = Scraper.objects.get_or_create( name="St Paul City Council", website="https://www.stpaul.gov/calendar", last_ran = datetime.now(), ) tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%B %d, %Y at %I:%M %p' calendar_url = 'https://www.stpaul.gov/calendar' city_site = "https://www.stpaul.gov" if len(sys.argv) >= 2: arg1 = sys.argv[1] br = getBrowser(arg1) else: print("No run_env") quit() br.get(calendar_url) sleep(3) def getEvents(br): ps = html.fromstring(br.page_source) eventBlocks = ps.xpath('.//*/div[@class="calendar__item views-row"]') for eB in eventBlocks: title = eB.xpath('.//div/h3[@class="field-content calendar__title"]/text()')[0] link = city_site + eB.xpath('.//div/span[@class="field-content calendar__link"]/a/@href')[0] dateTime = eB.xpath('.//div[@class="views-field views-field-field-calendar-date-value"]/span/text()')[0] print(dateTime, title, link) print('\n\n++++\n\n') venue, created = Organization.objects.get_or_create(name="Somewhere in St Paul", city="St. Paul") new_event = Event.objects.update_or_create( calendar = 'msp' event_type = 'Gv', show_title = title, show_link = link, show_date = datetime.strptime(dateTime, DATETIME_FORMAT), show_day = datetime.strptime(dateTime, DATETIME_FORMAT), venue = venue ) getEvents(br) sleep(5) br.get("https://www.stpaul.gov/calendar?page=1") getEvents(br) br.close() # br.find_element_by_class_name('fc-btn_allCalendars-button').click()