import os, sys from datetime import datetime, timedelta from dateutil import relativedelta sys.path.append('/var/www/digisnaxx.ado/scrapers') import dtss dtss.getReady() from time import sleep from pprint import pprint as ppr import pytz from selenium.webdriver.common.by import By from events.models import Organization, Scraper, Calendar, Event import events.digitools as digitools tz_str = "+0100 UTC" DATETIME_FORMAT = '%B %d %Y %H:%M %z %Z' venue, created = Organization.objects.get_or_create( name="City Site", city="Vienna", website="https://www.wien.gv.at/", is_venue = True ) scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'vie') scraper.items = 0 scraper.save() def getSite(br, website): ps = digitools.getSource(br, website) get_events(ps, "Mu") sleep(3) def get_events(ps, event_type): contents = ps.xpath('.//*/wm-card') for c in contents: try: event = {} date = c.xpath('.//*/ul/li/text()') # time = c.xpath('.//*/span[@class="col-md-2 suite_EvenTime"]/span/text()') # time = [x.replace("\n", "").strip() for x in time][2] # day, month, year = date[0].strip(), date[2].split("|")[0].replace(".", "").strip(), date[2].split("|")[1].strip() # month = digitools.translateMonth(month) event['date'] = (' ').join([month, day, year, time, tz_str]) event['scraper'] = scraper event['calendars'] = [scraper.calendar] event['title'] = c.xpath('.//*/h3/a/text()')[0] try: event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT) except Exception as e: print(e) pass event['link'] = c.xpath('.//a/@href')[0] digitools.createBasicEvent(event, "Mu", venue) scraper.items+=1 except Exception as e: print("\nError: ", e) ppr(event) # print("\n+++\n") pass if len(sys.argv) >= 2: arg1 = sys.argv[1] br = digitools.getBrowser(arg1) else: print("No run_env") br.close() quit() site_append = "veranstaltungen/suche?q=&facet={}&page={}".format('this month', 1) website = venue.website + site_append getSite(br,website) # getSite(br,website_2) digitools.updateScraper(scraper, item_count_start) br.close()