# Install Chromedriver and Quarantine # xattr -d com.apple.quarantine import os, sys from datetime import datetime from dateutil import relativedelta import django sys.path.append('../../../../') os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local' django.setup() from time import sleep from pprint import pprint as ppr from selenium import webdriver as wd from xvfbwrapper import Xvfb from lxml import html import pytz from events.models import Event, Organization, Scraper, Calendar from events.digitools import getBrowser, createURL, createBasicEvent, getSource, add_calendar try: scraper, created = Scraper.objects.get_or_create( name="MN Legislature", website="https://www.leg.mn.gov/cal?type=all", calendar = Calendar.objects.get(shortcode='msp'), items = 0, new_items = 0, last_ran = datetime.now(), ) except Exception as e: print(e) scraper = Scraper.objects.get(name="MN Legislature") print("Scraper: ", scraper) tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p' # Set initial variables for City, etc calendar_url = 'https://www.leg.mn.gov/cal?type=all' if len(sys.argv) >= 2: arg1 = sys.argv[1] br = getBrowser(arg1) else: print("No run_env") quit() br.get(calendar_url) sleep(10) ps = html.fromstring(br.page_source) commEvents = ps.xpath('.//*/div[@class="card border-dark comm_item cal_item ml-lg-3"]') senateEvents = ps.xpath('.//*/div[@class="card border-dark senate_item cal_item ml-lg-3"]') houseEvents = ps.xpath('.//*/div[@class="card border-dark house_item cal_item ml-lg-3"]') meetings = [] for hE in houseEvents: details = {} dateTime = hE.xpath('.//*/b/text()')[0] try: title = hE.xpath('.//*/h3/a/text()')[0] except: title = hE.xpath('.//*/h3/text()')[0] try: link = "https://www.leg.mn.gov/" + hE.xpath('.//*/div[@class="float-right text-center mr-2 d-print-none"]/a/@href')[0] except: link = hE.xpath('.//*/h3/a/@href')[0] details['location'] = hE.xpath('.//*/div[@class=""]/text()')[0] # print(dateTime, title, link, details['location']) venue, created = Organization.objects.get_or_create(name="MN House", city="St. Paul") new_event, created = Event.objects.update_or_create( event_type = 'Gv', show_title = title, show_link = link, show_date = datetime.strptime(dateTime, DATETIME_FORMAT), show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(), more_details = details['location'], venue = venue, scraper = scraper ) if type(new_event) is tuple: print("STS: ", new_event) add_calendar(new_event[0], 'msp') else: add_calendar(new_event, 'msp') scraper.items+=1 for sE in senateEvents: details = {} dateTime = sE.xpath('.//*/b/text()')[0] try: title = sE.xpath('.//*/h3/a/text()')[0] except: title = sE.xpath('.//*/h3/text()')[0] try: link = "https://www.leg.mn.gov/" + sE.xpath('.//*/div[@class="float-right text-center mr-2"]/a/@href')[0] except: link = sE.xpath('.//*/h3/a/@href')[0] location_list = sE.xpath('.//*/text()') if 'Location: ' in location_list: iN = location_list.index("Location: ") details['location'] = location_list[iN + 1] elif 'Senate Floor Session' in location_list: details['location'] = 'Senate Floor Session' venue, created = Organization.objects.get_or_create(name="MN Senate", city="St. Paul") new_event = Event.objects.update_or_create( event_type = 'Gv', show_title = title, show_link = link, show_date = datetime.strptime(dateTime, DATETIME_FORMAT), show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(), more_details = details['location'], venue = venue, scraper = scraper ) if type(new_event) is tuple: print("STS: ", new_event) add_calendar(new_event[0], 'msp') else: add_calendar(new_event, 'msp') scraper.items+=1 for cE in commEvents: details = {} dateTime = cE.xpath('.//*/b/text()')[0] try: title = cE.xpath('.//*/h3/a/text()')[0] except: title = cE.xpath('.//*/h3/text()')[0] try: link = "https://www.leg.mn.gov/" + cE.xpath('.//*/div[@class="float-right text-center mr-2"]/a/@href')[0] except: link = cE.xpath('.//*/h3/a/@href')[0] location_list = cE.xpath('.//*/text()') if 'Room: ' in location_list: iN = location_list.index("Room: ") details['location'] = location_list[iN + 1] # print(dateTime, title, link, details['location']) venue, created = Organization.objects.get_or_create(name="MN Legislature", city="St. Paul") new_event = Event.objects.update_or_create( event_type = 'Gv', show_title = title, show_link = link, show_date = datetime.strptime(dateTime, DATETIME_FORMAT), show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(), more_details = details['location'], venue = venue, scraper = scraper ) if type(new_event) is tuple: print("STS: ", new_event) add_calendar(new_event[0], 'msp') else: add_calendar(new_event, 'msp') scraper.items+=1 br.close() scraper.save() # br.find_element_by_class_name('fc-btn_allCalendars-button').click()