more scrapers

This commit is contained in:
2026-01-24 19:01:47 -05:00
parent 7013d8327a
commit 3c4a41ae2c
58 changed files with 1614 additions and 2988 deletions

View File

@@ -0,0 +1,86 @@
import os, sys
from datetime import datetime, timedelta
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from selenium.webdriver.common.by import By
from events.models import Organization, Scraper, Calendar, Event
import events.digitools as digitools
tz_str = "+0100 UTC"
DATETIME_FORMAT = '%B %d %Y %H:%M %z %Z'
venue, created = Organization.objects.get_or_create(
name="Arena Wien",
city="Vienna",
website="https://arena.wien/Home/Programm",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'vie')
scraper.items = 0
scraper.save()
def getSite(br, website):
ps = digitools.getSource(br, website)
get_events(ps, "Mu")
sleep(3)
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="suite_calRowContainer "]')
for c in contents:
try:
event = {}
date = c.xpath('.//*/span[@class="suite_datePlate"]/span/text()')
time = c.xpath('.//*/span[@class="col-md-2 suite_EvenTime"]/span/text()')
time = [x.replace("\n", "").strip() for x in time][2]
day, month, year = date[0].strip(), date[2].split("|")[0].replace(".", "").strip(), date[2].split("|")[1].strip()
month = digitools.translateMonth(month)
event['date'] = (' ').join([month, day, year, time, tz_str])
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = c.xpath('.//*/span[@class="Event_H1"]/text()')[0]
try:
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
except Exception as e:
print(e)
pass
event['link'] = c.xpath('.//a/@href')[0]
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("\nError: ", e)
ppr(event)
# print("\n+++\n")
pass
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
month = datetime.now().month
year = datetime.now().year
odt_next_month = datetime.now() + relativedelta.relativedelta(months=1)
next_month = odt_next_month.month
website = "https://arena.wien/Home/Programm#data_abonnement=-1&data_month={}&data_year={}".format(str(month), str(year))
website_2 = "https://arena.wien/Home/Programm#data_abonnement=-1&data_month={}&data_year={}".format(str(next_month), str(year))
getSite(br,website)
getSite(br,website_2)
digitools.updateScraper(scraper, item_count_start)
br.close()