import re, os, sys from datetime import datetime import django sys.path.append('../../../') os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings' django.setup() from events.models import Event, Organization, Scraper from time import sleep from pprint import pprint as ppr from selenium import webdriver as wd from selenium.webdriver.support.ui import Select from selenium.webdriver.common.by import By from xvfbwrapper import Xvfb from lxml import html import pytz from events.digitools import getBrowser, createURL, createBasicEvent, getSource try: scraper, created = Scraper.objects.get_or_create( name="Mpls City Council", website="https://lims.minneapolismn.gov/Calendar/citycouncil/upcoming", items = 0, last_ran = datetime.now(), ) except Exception as e: print(e) scraper = Scraper.objects.get(name="Mpls City Council") print("Scraper: ", scraper) tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%A, %b %d, %Y %I:%M %p' calendar_url = 'https://lims.minneapolismn.gov/Calendar/citycouncil/upcoming' if len(sys.argv) >= 2: arg1 = sys.argv[1] br = getBrowser(arg1) else: print("No run_env") quit() br.get(calendar_url) sleep(25) # br.find_element(By.XPATH, '//*/li[@class="tab-header-small"]/a').click() # sleep(15) # all_entries = Select(br.find_element(By.XPATH, '//*/select')) # all_entries.select_by_value('50') # sleep(15) ps = html.fromstring(br.page_source) dayBlocks = ps.xpath('.//*/div[@class="ng-scope"]') meetings = [] for dB in dayBlocks: date = dB.xpath('.//div[@class="row"]/div/span[@class="ng-binding"]/text()')[0] events = dB.xpath('.//div[@class="upcoming ng-scope"]/div') for event in events: time = event.xpath('.//div/text()')[0] title = event.xpath('.//div/a/text()')[0].strip() if not len(title) > 0: title = event.xpath('.//div/span/a/text()')[0].strip() link = event.xpath('.//div/a/@href')[0] if link.startswith("/Download/"): link = calendar_url else: link = "https://lims.minneapolismn.gov" + link location = title.split(',')[-1].strip() mtg_title = title.split(',')[:-1] if len(mtg_title) > 1: mtg_title = (' -').join(mtg_title).strip() else: mtg_title = mtg_title[0].strip() dateTime = datetime.strptime(date + " " + time, DATETIME_FORMAT) if location == "City Hall": location = "Mpls City Hall" print(dateTime, location, mtg_title, link) print('\n\n++++\n\n') venue, created = Organization.objects.get_or_create(name=location, city="Minneapolis") new_event = Event.objects.update_or_create( calendar = 'msp' event_type = 'Gv', show_title = mtg_title, show_link = link, show_date = dateTime, show_day = dateTime, venue = venue ) scraper.items+=1 br.close() scraper.save() # br.find_element_by_class_name('fc-btn_allCalendars-button').click()