import os, sys from datetime import datetime from dateutil import relativedelta import django sys.path.append('../../../') os.environ['DJANGO_SETTINGS_MODULE'] = 'events.settings' django.setup() from time import sleep from pprint import pprint as ppr import pytz from events.models import Organization, Scraper from events.digitools import getBrowser, createURL, createBasicArticle, getSource scraper, created = Scraper.objects.get_or_create( name="Sahan Journal", website="https://sahanjournal.com/", last_ran = datetime.now(), ) org, created = Organization.objects.get_or_create( name="Sahan Journal", city="Minneapolis", website="https://sahanjournal.com/", is_venue=False, ) event_type = "Ja" # Time Signatures tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%b %d %Y %I:%M %p' DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y' def get_events(ps, event_type): contents = ps.xpath('.//*/article') count = 0 for c in contents: try: if count > 10: br.close() quit() article = {} article['title'] = c.xpath('.//*/h2[@class="entry-title"]/a/text()')[0] article['link'] = c.xpath('.//*/h2[@class="entry-title"]/a/@href')[0] createBasicArticle(article, event_type, org) count+=1 except Exception as e: print(e) ppr(article) print("\n+++\n") if len(sys.argv) >= 2: arg1 = sys.argv[1] br = getBrowser(arg1) else: print("No run_env") quit() ps = getSource(br, org.website) get_events(ps, "Ed") sleep(3) br.close() scraper.save()