import os, sys from datetime import datetime, timedelta from dateutil import relativedelta sys.path.append('/var/www/digisnaxx.ado/scrapers') import dtss dtss.getReady() from time import sleep from pprint import pprint as ppr import pytz from selenium.webdriver.common.by import By from events.models import Organization, Scraper, Calendar, Event import events.digitools as digitools tz_str = "+0100 UTC" DATETIME_FORMAT = '%B %d %Y %H:%M %z %Z' venue, created = Organization.objects.get_or_create( name="Arena Wien", city="Vienna", website="https://arena.wien/Home/Programm", is_venue = True ) scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'vie') scraper.items = 0 scraper.save() def getSite(br, website): ps = digitools.getSource(br, website) get_events(ps, "Mu") sleep(3) def get_events(ps, event_type): contents = ps.xpath('.//*/div[@class="suite_calRowContainer "]') for c in contents: try: event = {} date = c.xpath('.//*/span[@class="suite_datePlate"]/span/text()') time = c.xpath('.//*/span[@class="col-md-2 suite_EvenTime"]/span/text()') time = [x.replace("\n", "").strip() for x in time][2] day, month, year = date[0].strip(), date[2].split("|")[0].replace(".", "").strip(), date[2].split("|")[1].strip() month = digitools.translateMonth(month) event['date'] = (' ').join([month, day, year, time, tz_str]) event['scraper'] = scraper event['calendars'] = [scraper.calendar] event['title'] = c.xpath('.//*/span[@class="Event_H1"]/text()')[0] try: event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT) except Exception as e: print(e) pass event['link'] = c.xpath('.//a/@href')[0] digitools.createBasicEvent(event, "Mu", venue) scraper.items+=1 except Exception as e: print("\nError: ", e) ppr(event) # print("\n+++\n") pass if len(sys.argv) >= 2: arg1 = sys.argv[1] br = digitools.getBrowser(arg1) else: print("No run_env") br.close() quit() month = datetime.now().month year = datetime.now().year odt_next_month = datetime.now() + relativedelta.relativedelta(months=1) next_month = odt_next_month.month website = "https://arena.wien/Home/Programm#data_abonnement=-1&data_month={}&data_year={}".format(str(month), str(year)) website_2 = "https://arena.wien/Home/Programm#data_abonnement=-1&data_month={}&data_year={}".format(str(next_month), str(year)) getSite(br,website) getSite(br,website_2) digitools.updateScraper(scraper, item_count_start) br.close()