import re, os, sys from datetime import datetime import django sys.path.append('../../../../') os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local' django.setup() from events.models import Event, Organization, Scraper, Calendar from time import sleep from pprint import pprint as ppr from selenium import webdriver as wd from selenium.webdriver.common.by import By from xvfbwrapper import Xvfb from lxml import html import pytz from events.digitools import getBrowser, createURL, createBasicEvent, getSource, add_calendar try: scraper, created = Scraper.objects.get_or_create( name="St Paul City Council", website="https://www.stpaul.gov/calendar", calendar = Calendar.objects.get(shortcode='msp'), items = 0, new_items = 0, last_ran = datetime.now(), ) except Exception as e: print(e) scraper = Scraper.objects.get(name="St Paul City Council") print("Scraper: ", scraper) tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%B %d, %Y at %I:%M %p' calendar_url = 'https://www.stpaul.gov/calendar' city_site = "https://www.stpaul.gov" if len(sys.argv) >= 2: arg1 = sys.argv[1] br = getBrowser(arg1) else: print("No run_env") quit() br.get(calendar_url) sleep(3) def getEvents(br): ps = html.fromstring(br.page_source) eventBlocks = ps.xpath('.//*/div[@class="calendar__item views-row"]') for eB in eventBlocks: title = eB.xpath('.//div/h3[@class="field-content calendar__title"]/text()')[0] link = city_site + eB.xpath('.//div/span[@class="field-content calendar__link"]/a/@href')[0] dateTime = eB.xpath('.//div[@class="views-field views-field-field-calendar-date-value"]/span/text()')[0] print(dateTime, title, link) print('\n\n++++\n\n') venue, created = Organization.objects.get_or_create(name="Somewhere in St Paul", city="St. Paul") new_event = Event.objects.update_or_create( event_type = 'Gv', show_title = title, show_link = link, show_date = datetime.strptime(dateTime, DATETIME_FORMAT), show_day = datetime.strptime(dateTime, DATETIME_FORMAT), venue = venue, scraper = scraper ) add_calendar(new_event, 'msp') getEvents(br) sleep(5) br.get("https://www.stpaul.gov/calendar?page=1") getEvents(br) br.close() # br.find_element_by_class_name('fc-btn_allCalendars-button').click()