lots of updates :/
This commit is contained in:
@@ -3,8 +3,8 @@ from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
sys.path.append('../../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
@@ -22,7 +22,8 @@ venue, created = Organization.objects.get_or_create(
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
|
||||
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
@@ -40,7 +41,7 @@ def get_events(ps, event_type):
|
||||
if month == "Jan":
|
||||
year = int(year) + 1
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['calendars'] = [scraper.calendar]
|
||||
event['title'] = c.xpath('.//*/span[@class="event_title"]/a/text()')[0]
|
||||
event['date'] = [month, day, str(year), c.xpath('.//*/span[@class="event_time"]/text()')[0].strip()]
|
||||
event['date'] = " ".join(event['date'])
|
||||
|
||||
@@ -3,8 +3,8 @@ from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
sys.path.append('../../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
@@ -21,7 +21,7 @@ venue, created = Organization.objects.get_or_create(
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
|
||||
|
||||
DATETIME_FORMAT = '%B %d %Y %I:%M%p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
@@ -35,7 +35,7 @@ def get_events(ps):
|
||||
month = c.xpath('.//*/div[@class="date-month"]/text()')[0]
|
||||
year = datetime.now().year
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['calendars'] = [scraper.calendar]
|
||||
event['title'] = c.xpath('.//div/h4/a/text()')[0]
|
||||
event['date'] = [month, day, str(year), c.xpath('.//div[@class="event-info"]/p/text()')[0].split(" ")[0]]
|
||||
event['date'] = " ".join(event['date'])
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Eastside Freedom Library",
|
||||
city="Minneapolis",
|
||||
website="https://eastsidefreedomlibrary.org/events/",
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%B %d @ %I:%M %p %Y'
|
||||
|
||||
def get_events(ps):
|
||||
contents = ps.xpath('.//*/article')
|
||||
# ppr("contents:", contents)
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/h3[@class="tribe-events-calendar-list__event-title tribe-common-h6 tribe-common-h4--min-medium"]/a/text()')[0].strip()
|
||||
event['link'] = c.xpath('.//*/h3[@class="tribe-events-calendar-list__event-title tribe-common-h6 tribe-common-h4--min-medium"]/a/@href')[0]
|
||||
event['date'] = c.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].strip() + " " + current_year
|
||||
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
try:
|
||||
new_event = digitools.createBasicEvent(event, "Ed", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n+++\n")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
calendar_url = 'https://eastsidefreedomlibrary.org/events/'
|
||||
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
|
||||
get_events(ps)
|
||||
|
||||
# ppr(events)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
@@ -3,8 +3,8 @@ from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
sys.path.append('../../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
@@ -23,7 +23,7 @@ venue, created = Organization.objects.get_or_create(
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
@@ -93,7 +93,7 @@ events = []
|
||||
def get_info(pse):
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['calendars'] = [scraper.calendar]
|
||||
event["venue"] = pse.xpath('.//*/div[@class="content"]/div/div[@class="venue_name"]/text()')[0].replace('\t', '').replace('\n', '').strip()
|
||||
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
if event["show_title"] == "":
|
||||
@@ -164,7 +164,7 @@ for show in shows:
|
||||
try:
|
||||
event['dateStamp'] = event['date_time']
|
||||
event['scraper'] = scraper
|
||||
new_event, created = digitools.createDetailedEvent(event, "Mu", venue)
|
||||
new_event, created = digitools.createDetailedEvent(event, "Mu", venue, scraper)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print("event creation error: ", e, "\n\n", event, "\n\n", created)
|
||||
|
||||
@@ -3,8 +3,8 @@ from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
sys.path.append('../../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
@@ -21,7 +21,7 @@ venue, created = Organization.objects.get_or_create(
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
|
||||
|
||||
event_type = ""
|
||||
|
||||
@@ -41,7 +41,7 @@ def get_events(ps, event_type):
|
||||
if month == "January":
|
||||
year = int(year) + 1
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['calendars'] = [scraper.calendar]
|
||||
event['title'] = c.xpath('.//*/h3/a/text()')[0].replace("\n", "").replace("\t", "")
|
||||
event['date'] = " ".join([ dateTime, str(year)])
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
|
||||
@@ -3,8 +3,8 @@ from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
sys.path.append('../../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
@@ -22,7 +22,7 @@ venue, created = Organization.objects.get_or_create(
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
|
||||
|
||||
event_type = "Mu"
|
||||
|
||||
@@ -43,7 +43,7 @@ def get_events(ps, event_type):
|
||||
# if month == "Jan":
|
||||
# year = int(year) + 1
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['calendars'] = [scraper.calendar]
|
||||
event['title'] = c.xpath('.//*/div[@class="vp-event-name"]/text()')[0]
|
||||
event['datetime'] = date + " " + str(year) + " " + time
|
||||
event['dateStamp'] = datetime.strptime(event['datetime'], DATETIME_FORMAT)
|
||||
|
||||
@@ -3,8 +3,8 @@ from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
sys.path.append('../../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
@@ -26,7 +26,7 @@ venue, created = Organization.objects.get_or_create(
|
||||
is_venue=True,
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
|
||||
ppr(scraper)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
@@ -65,7 +65,7 @@ def get_listings(pse, events):
|
||||
e['image'] = event.xpath('.//*/img/@data-src')[0]
|
||||
e["date_time"] = datetime.strptime(e['datetime'], DATETIME_FORMAT)
|
||||
e['scraper'] = scraper
|
||||
e['calendar'] = scraper.calendar
|
||||
e['calendar'] = [scraper.calendar]
|
||||
events.append(e)
|
||||
|
||||
events = []
|
||||
@@ -79,7 +79,6 @@ for cal in calendar_url:
|
||||
for event in events:
|
||||
try:
|
||||
new_event = Event.objects.update_or_create(
|
||||
calendar = event['calendar'],
|
||||
scraper = event['scraper'],
|
||||
event_type = 'Mu',
|
||||
show_title = event["show_title"],
|
||||
@@ -89,6 +88,7 @@ for event in events:
|
||||
guests = " ".join(event["subtitle"]),
|
||||
venue = venue
|
||||
)
|
||||
digitools.add_calendar(new_event, 'msp')
|
||||
except Exception as e:
|
||||
print("oops ", e, "\n\n", "Scraper:", scraper)
|
||||
|
||||
|
||||
@@ -3,8 +3,8 @@ from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
sys.path.append('../../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
@@ -21,7 +21,7 @@ venue, created = Organization.objects.get_or_create(
|
||||
is_venue=False
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
|
||||
|
||||
DATETIME_FORMAT = '%A, %B %d , %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
@@ -35,7 +35,7 @@ def get_events(ps, event_type):
|
||||
month = c.xpath('.//*/div[@class="month"]/text()')[0]
|
||||
year = c.xpath('.//*/div[@class="year"]/text()')[0]
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['calendars'] = [scraper.calendar]
|
||||
event['title'] = c.xpath('.//*/h3/text()')[0]
|
||||
event['date'] = [month, day, year, c.xpath('.//*/p[@class="time"]/text()')[0]]
|
||||
event['date'] = " ".join(event['date'])
|
||||
@@ -45,7 +45,7 @@ def get_events(ps, event_type):
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
event['link'] = "https://www.magersandquinn.com" + c.xpath('.//a[@class="event off-site"]/@href')[0]
|
||||
print(e)
|
||||
print("Error: ", e)
|
||||
ppr(event)
|
||||
digitools.createBasicEvent(event, "Ed", venue)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
@@ -3,8 +3,8 @@ from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
sys.path.append('../../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
@@ -25,7 +25,7 @@ venue, created = Organization.objects.get_or_create(
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
scraper,item_count_start, virtcal = digitools.getScraper(venue, 'msp')
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
@@ -44,7 +44,7 @@ else:
|
||||
quit()
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(30)
|
||||
sleep(20)
|
||||
|
||||
def getEvents(br):
|
||||
ps = html.fromstring(br.page_source)
|
||||
@@ -54,19 +54,19 @@ def getEvents(br):
|
||||
dateTime = event.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].replace("•", "").strip() + " " + current_year
|
||||
title = event.xpath('.//*/h2[@class="alt-font"]/a/text()')[0].replace("\n", "").replace("\t", "")
|
||||
link = event.xpath('.//*/h2[@class="alt-font"]/a/@href')[0]
|
||||
deets["tickets"] = event.xpath('.//*/span[@class="tribe-events-c-small-cta__price"]/strong/text()')[0]
|
||||
# deets["tickets"] = event.xpath('.//*/span[@class="tribe-events-c-small-cta__price"]/strong/text()')[0]
|
||||
try:
|
||||
new_event = Event.objects.update_or_create(
|
||||
calendar = scraper.calendar,
|
||||
new_event, created = Event.objects.update_or_create(
|
||||
scraper = scraper,
|
||||
event_type = 'Mu',
|
||||
show_title = title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
|
||||
more_details = deets["tickets"],
|
||||
show_day = datetime.strptime(dateTime, DATETIME_FORMAT),
|
||||
# more_details = deets["tickets"],
|
||||
venue = venue
|
||||
)
|
||||
digitools.add_calendar(new_event, 'msp')
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print("oops", e)
|
||||
|
||||
@@ -3,8 +3,8 @@ from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
sys.path.append('../../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
@@ -25,7 +25,7 @@ try:
|
||||
except Exception as e:
|
||||
venue = Organization.objects.get(name="Parkway Theater")
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
scraper,item_count_start, virtcal = digitools.getScraper(venue, 'msp')
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
@@ -38,7 +38,7 @@ def get_events(ps, event_type):
|
||||
for c,i in zip(contents,img_etc):
|
||||
try:
|
||||
event = {}
|
||||
event['calendar'] = scraper.calendar
|
||||
event['calendars'] = [scraper.calendar]
|
||||
event['title'] = c.xpath('.//*/a[@class="summary-title-link"]/text()')[0]
|
||||
event['link'] = "https://theparkwaytheater.com" + c.xpath('.//*/a[@class="summary-title-link"]/@href')[0]
|
||||
event['date'] = c.xpath('.//div/div/time/text()')[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("–")[0].strip()
|
||||
@@ -51,8 +51,7 @@ def get_events(ps, event_type):
|
||||
}
|
||||
|
||||
try:
|
||||
new_event = DSEvent.objects.update_or_create(
|
||||
calendar = scraper.calendar,
|
||||
new_event, created = DSEvent.objects.update_or_create(
|
||||
scraper = scraper,
|
||||
event_type = event_type,
|
||||
show_title = event['title'],
|
||||
@@ -62,13 +61,13 @@ def get_events(ps, event_type):
|
||||
more_details = event["details"],
|
||||
venue = venue
|
||||
)
|
||||
digitools.add_calendar(new_event, 'msp')
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
try:
|
||||
event['date'] = c.xpath('.//div/div/time/text()')[0].split("–")[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("–")[0].strip()
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
new_event = DSEvent.objects.update_or_create(
|
||||
calendar = scraper.calendar,
|
||||
new_event, created = DSEvent.objects.update_or_create(
|
||||
scraper = scraper,
|
||||
event_type = event_type,
|
||||
show_title = event['title'],
|
||||
@@ -78,6 +77,7 @@ def get_events(ps, event_type):
|
||||
more_details = event["details"],
|
||||
venue = venue
|
||||
)
|
||||
digitools.add_calendar(new_event, 'msp')
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
@@ -3,10 +3,9 @@ from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
sys.path.append('../../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
@@ -22,7 +21,7 @@ venue, created = Organization.objects.get_or_create(
|
||||
is_venue = False
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
scraper,item_count_start, virtcal = digitools.getScraper(venue, 'msp')
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
@@ -38,7 +37,7 @@ def get_events(ps, event_type):
|
||||
subtitle = ps.xpath('.//*/article/h1/em/text()')
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['calendars'] = [scraper.calendar]
|
||||
if len(subtitle) == 1:
|
||||
if len(ntitle) == 2:
|
||||
title = ntitle[0] + subtitle[0] + ntitle[1]
|
||||
|
||||
@@ -3,8 +3,8 @@ from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
sys.path.append('../../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
@@ -24,7 +24,7 @@ venue, created = Organization.objects.get_or_create(
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
scraper,item_count_start, virtcal = digitools.getScraper(venue, 'msp')
|
||||
|
||||
|
||||
# Set initial variables for City, etc
|
||||
@@ -42,11 +42,13 @@ else:
|
||||
quit()
|
||||
|
||||
def get_listings(pse, events):
|
||||
listings = pse.xpath('.//*/div[@class="tribe-common-g-row tribe-events-calendar-list__event-row"]')
|
||||
listings = pse.xpath('.//*/li[@class="tribe-common-g-row tribe-events-calendar-list__event-row"]')
|
||||
print("get listings", listings)
|
||||
for l in listings:
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['calendars'] = [scraper.calendar]
|
||||
print("1: ", event)
|
||||
try:
|
||||
event["image"] = l.xpath('.//*/img/@src')[0]
|
||||
except:
|
||||
|
||||
@@ -3,8 +3,8 @@ from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
sys.path.append('../../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
@@ -21,7 +21,7 @@ venue, created = Organization.objects.get_or_create(
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
@@ -39,7 +39,7 @@ def get_events(ps):
|
||||
pse = digitools.getSource(br, "https://www.thecedar.org" + l)
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['calendars'] = [scraper.calendar]
|
||||
event['link'] = "https://www.thecedar.org" + l
|
||||
try:
|
||||
time = pse.xpath('.//*/time[@class="event-time-localized-start"]/text()')[0]
|
||||
|
||||
@@ -3,8 +3,8 @@ from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
sys.path.append('../../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
@@ -24,7 +24,7 @@ venue, created = Organization.objects.get_or_create(
|
||||
is_venue=True,
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
@@ -88,7 +88,7 @@ for d in dates:
|
||||
event["bands"] = (", ").join(bands)
|
||||
if len(bands) > 0:
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['calendars'] = [scraper.calendar]
|
||||
event['title'] = event["bands"]
|
||||
event['date'] = event["date_time"]
|
||||
event['dateStamp'] = event["date_time"]
|
||||
|
||||
1295
events/scrapers/Working/venues/manual/data.json
Normal file
1295
events/scrapers/Working/venues/manual/data.json
Normal file
File diff suppressed because it is too large
Load Diff
1471
events/scrapers/Working/venues/manual/data.updated.json
Normal file
1471
events/scrapers/Working/venues/manual/data.updated.json
Normal file
File diff suppressed because it is too large
Load Diff
169
events/scrapers/Working/venues/manual/eventario.py
Normal file
169
events/scrapers/Working/venues/manual/eventario.py
Normal file
@@ -0,0 +1,169 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
import json
|
||||
|
||||
from selenium.webdriver.common.by import By
|
||||
from lxml import html
|
||||
|
||||
import django
|
||||
sys.path.append('../../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Calendar
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Events Medellin",
|
||||
city="Medellin",
|
||||
website="https://eventario.co/events-category/social/",
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'mde')
|
||||
|
||||
DATETIME_FORMAT = '%B %d %Y %I:%M%p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
# with open('data.json') as f:
|
||||
# totalLinks = json.load(f)
|
||||
|
||||
def getLinks(br, url, links):
|
||||
br.get(url)
|
||||
sleep(2)
|
||||
br.execute_script("window.scrollTo(0, window.scrollY + 1500)")
|
||||
sleep(2)
|
||||
x = 1
|
||||
while x == 1:
|
||||
try:
|
||||
div = br.find_element(By.XPATH, ".//*/div[@class='feed-pagination flexify']/a")
|
||||
div.click()
|
||||
sleep(2)
|
||||
br.execute_script("window.scrollTo(0, window.scrollY + 1100)")
|
||||
sleep(2)
|
||||
except:
|
||||
x = 0
|
||||
ps = html.fromstring(br.page_source)
|
||||
newLinks = []
|
||||
# newlinks = ps.xpath('.//*/div[@class="e-con-inner"]/*/a/@href')
|
||||
events = ps.xpath('.//*/div[@class="e-con-inner"]')
|
||||
for event in events:
|
||||
e = {}
|
||||
try:
|
||||
e['link'] = event.xpath('.//*/a/@href')[0]
|
||||
e['title'] = event.xpath('.//*/h3/a/text()')[0]
|
||||
e['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0].replace('\n', '').replace('\t', '')
|
||||
# e['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0]
|
||||
e['venueLink'] = event.xpath('.//*/ul/li/a/@href')[1]
|
||||
label= event.xpath('.//*/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/div/text()')
|
||||
e['label'] = ''.join([x.replace('\t', '').replace('\n', '') for x in label]).strip()
|
||||
newLinks.append(e)
|
||||
except Exception as e:
|
||||
print("Error: ", e)
|
||||
|
||||
links = links + newLinks
|
||||
return links
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
urls = ["https://eventario.co/events-category/social/", "https://eventario.co/events-category/musica/", "https://eventario.co/events-category/cultura/"]
|
||||
|
||||
allLinks = []
|
||||
|
||||
for url in urls:
|
||||
allLinks = getLinks(br, url, allLinks)
|
||||
|
||||
totalLinks = list({v['title']:v for v in allLinks}.values())
|
||||
ppr(len(totalLinks))
|
||||
# sortedlinks = allLinks.sort()
|
||||
# ppr(sortedlinks)
|
||||
|
||||
for event in totalLinks:
|
||||
br.get(event['link'])
|
||||
sleep(2)
|
||||
ps = html.fromstring(br.page_source)
|
||||
dateTime= ps.xpath('.//*/div[@class="elementor-element elementor-element-d9beb21 elementor-widget elementor-widget-heading"]/span[@class="elementor-heading-title elementor-size-default"]/text()')
|
||||
event['dateTime'] = [x[3:].split('-')[0].strip() for x in dateTime]
|
||||
locations = ps.xpath('.//*/div[@class="elementor-element elementor-element-f04aae3 elementor-widget__width-initial elementor-widget-mobile__width-initial elementor-widget elementor-widget-ts-advanced-list"]/*/li[@class="elementor-repeater-item-138dbed flexify ts-action"]/a/text()')
|
||||
location = [x.replace('\t', '').replace('\n', '') for x in locations]
|
||||
if len(location) == 2:
|
||||
event['location'] = "{0}, {1}".format(location[1], location[0])
|
||||
else:
|
||||
try:
|
||||
event['location'] = location[0]
|
||||
except:
|
||||
event['location'] = 'n/a'
|
||||
address= ps.xpath('.//*/ul[@class="flexify simplify-ul ts-advanced-list"]/li[@class="elementor-repeater-item-842568d flexify ts-action"]/div/text()')
|
||||
try:
|
||||
event['address'] = [x for x in address if 'Capacidad' not in x and '$' not in x][0]
|
||||
except:
|
||||
event['address'] = address
|
||||
# ppr(event)
|
||||
|
||||
sleep(2)
|
||||
br.close()
|
||||
|
||||
data = totalLinks
|
||||
print("Set:", len(totalLinks))
|
||||
|
||||
paisa = []
|
||||
|
||||
for d in data:
|
||||
if len(d['dateTime']) != 0:
|
||||
if 'Familia' in d['label']:
|
||||
d['category'] = 'Ot'
|
||||
elif 'Comedia' in d['label']:
|
||||
d['category'] = 'Co'
|
||||
elif ('Magic' in d['title']) or ('Juegos' in d['label']):
|
||||
d['category'] = 'Ot'
|
||||
elif ('Conferencias' in d['label']) or ('Intercambio' in d['label']):
|
||||
d['category'] = 'Ed'
|
||||
else:
|
||||
d['category'] = 'Mu'
|
||||
if "Antioquia" in d['location']:
|
||||
try:
|
||||
d['city'] = d['location'].split(',')[0]
|
||||
paisa.append(d)
|
||||
except:
|
||||
continue
|
||||
|
||||
cal = Calendar.objects.get(shortcode='mde')
|
||||
|
||||
for d in paisa:
|
||||
d['dateStamp'] =datetime.strptime(d['dateTime'][0], DATETIME_FORMAT)
|
||||
try:
|
||||
nvenue, created = Organization.objects.get_or_create(
|
||||
name=d['venue'],
|
||||
city=d['city'],
|
||||
website=d['venueLink'],
|
||||
address_complete = d['address'],
|
||||
is_venue=True
|
||||
)
|
||||
except:
|
||||
nvenue = Organization.objects.get(name=d['venue'])
|
||||
nvenue.address_complete = d['address']
|
||||
nvenue.save()
|
||||
new_event, created = Event.objects.update_or_create(
|
||||
event_type = d['category'],
|
||||
show_title = d['title'],
|
||||
show_link = d['link'],
|
||||
show_date = d['dateStamp'],
|
||||
show_day = d['dateStamp'],
|
||||
scraper = scraper,
|
||||
venue = nvenue
|
||||
)
|
||||
new_event.calendar.add(cal)
|
||||
new_event.save()
|
||||
print(new_event)
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
Reference in New Issue
Block a user