import os, sys from datetime import datetime from dateutil import relativedelta import django sys.path.append('../../../') os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings' django.setup() from time import sleep from pprint import pprint as ppr import pytz from events.models import Organization, Scraper from events.digitools import getBrowser, createURL, createBasicArticle, getSource org, created = Organization.objects.get_or_create( name="MinnPost", city="Minneapolis", website="https://www.minnpost.com/", is_venue=False, ) try: scraper, created = Scraper.objects.get_or_create( name=org.name, website=org.website, items = 0, last_ran = datetime.now(), ) except Exception as e: print(e) scraper = Scraper.objects.get(name=org.name) print("Scraper: ", scraper) event_type = "Ja" # Time Signatures tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%b %d %Y %I:%M %p' DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y' def get_events(ps, event_type): contents = ps.xpath('.//*/article') count = 0 ppr(contents) for c in contents: try: if count > 10: br.close() quit() article = {} article['title'] = c.xpath('.//*/h2[@class="entry-title"]/a/text()')[0] article['link'] = c.xpath('.//*/h2[@class="entry-title"]/a/@href')[0] createBasicArticle(article, event_type, org) ppr(article) print("Success") count+=1 except Exception as e: print(e) ppr(article) print("\n\n+++\n\n") if len(sys.argv) >= 2: arg1 = sys.argv[1] br = getBrowser(arg1) else: print("No run_env") quit() ps = getSource(br, org.website) get_events(ps, "Ed") sleep(3) br.close() scraper.save()