Files
api.digisnaxx/event_scrapers/zArchive/broken/cedar.mpls.py
2025-10-11 03:50:49 -05:00

64 lines
1.8 KiB
Python

import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
from selenium import webdriver as wd
from xvfbwrapper import Xvfb
from lxml import html
import pytz
from events.models import Event as DSEvent, Organization
from digitools import getBrowser, createBasicEvent, getSource
venue, created = Organization.objects.get_or_create(
name="Cedar Cultural Center",
city="Minneapolis",
website="https://www.thecedar.org/listing",
)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
DATETIME_FORMAT_3 = '%A, %B %d at %I:%M%p %Y'
DATETIME_FORMAT_4 = '%A, %B %d at %I%p %Y'
DATETIME_FORMAT_5 = '%A, %B %d @%I%p %Y'
def get_events(ps):
links = ps.xpath('.//*/div[@class="summary-title"]/a/@href')
# ppr("contents:", contents)
for l in links:
br.get("https://www.thecedar.org" + l)
sleep(3)
pse = html.fromstring(br.page_source)
event = {}
time = pse.xpath('.//*/time[@class="event-time-12hr-start"]/text()')[0]
date = pse.xpath('.//*/time[@class="event-date"]/text()')[0]
event['title'] = pse.xpath('.//*/h1[@class="eventitem-title"]/text()')[0]
dateStamp = date + " " + time
event['dateStamp'] = datetime.strptime(dateStamp, DATETIME_FORMAT)
event['link'] = "https://www.thecedar.org" + l
createBasicEvent(event, "Mu", venue)
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = getBrowser(arg1)
else:
print("No run_env")
quit()
calendar_url = 'https://www.thecedar.org/listing'
ps = getSource(br, calendar_url)
get_events(ps)
# ppr(events)
br.close()