lots of updates :/

This commit is contained in:
2025-11-30 15:34:08 -05:00
parent 61e198ff55
commit e06a5d49f8
84 changed files with 6614 additions and 2297 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,169 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import json
from selenium.webdriver.common.by import By
from lxml import html
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Events Medellin",
city="Medellin",
website="https://eventario.co/events-category/social/",
is_venue=True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'mde')
DATETIME_FORMAT = '%B %d %Y %I:%M%p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
# with open('data.json') as f:
# totalLinks = json.load(f)
def getLinks(br, url, links):
br.get(url)
sleep(2)
br.execute_script("window.scrollTo(0, window.scrollY + 1500)")
sleep(2)
x = 1
while x == 1:
try:
div = br.find_element(By.XPATH, ".//*/div[@class='feed-pagination flexify']/a")
div.click()
sleep(2)
br.execute_script("window.scrollTo(0, window.scrollY + 1100)")
sleep(2)
except:
x = 0
ps = html.fromstring(br.page_source)
newLinks = []
# newlinks = ps.xpath('.//*/div[@class="e-con-inner"]/*/a/@href')
events = ps.xpath('.//*/div[@class="e-con-inner"]')
for event in events:
e = {}
try:
e['link'] = event.xpath('.//*/a/@href')[0]
e['title'] = event.xpath('.//*/h3/a/text()')[0]
e['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0].replace('\n', '').replace('\t', '')
# e['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0]
e['venueLink'] = event.xpath('.//*/ul/li/a/@href')[1]
label= event.xpath('.//*/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/div/text()')
e['label'] = ''.join([x.replace('\t', '').replace('\n', '') for x in label]).strip()
newLinks.append(e)
except Exception as e:
print("Error: ", e)
links = links + newLinks
return links
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
urls = ["https://eventario.co/events-category/social/", "https://eventario.co/events-category/musica/", "https://eventario.co/events-category/cultura/"]
allLinks = []
for url in urls:
allLinks = getLinks(br, url, allLinks)
totalLinks = list({v['title']:v for v in allLinks}.values())
ppr(len(totalLinks))
# sortedlinks = allLinks.sort()
# ppr(sortedlinks)
for event in totalLinks:
br.get(event['link'])
sleep(2)
ps = html.fromstring(br.page_source)
dateTime= ps.xpath('.//*/div[@class="elementor-element elementor-element-d9beb21 elementor-widget elementor-widget-heading"]/span[@class="elementor-heading-title elementor-size-default"]/text()')
event['dateTime'] = [x[3:].split('-')[0].strip() for x in dateTime]
locations = ps.xpath('.//*/div[@class="elementor-element elementor-element-f04aae3 elementor-widget__width-initial elementor-widget-mobile__width-initial elementor-widget elementor-widget-ts-advanced-list"]/*/li[@class="elementor-repeater-item-138dbed flexify ts-action"]/a/text()')
location = [x.replace('\t', '').replace('\n', '') for x in locations]
if len(location) == 2:
event['location'] = "{0}, {1}".format(location[1], location[0])
else:
try:
event['location'] = location[0]
except:
event['location'] = 'n/a'
address= ps.xpath('.//*/ul[@class="flexify simplify-ul ts-advanced-list"]/li[@class="elementor-repeater-item-842568d flexify ts-action"]/div/text()')
try:
event['address'] = [x for x in address if 'Capacidad' not in x and '$' not in x][0]
except:
event['address'] = address
# ppr(event)
sleep(2)
br.close()
data = totalLinks
print("Set:", len(totalLinks))
paisa = []
for d in data:
if len(d['dateTime']) != 0:
if 'Familia' in d['label']:
d['category'] = 'Ot'
elif 'Comedia' in d['label']:
d['category'] = 'Co'
elif ('Magic' in d['title']) or ('Juegos' in d['label']):
d['category'] = 'Ot'
elif ('Conferencias' in d['label']) or ('Intercambio' in d['label']):
d['category'] = 'Ed'
else:
d['category'] = 'Mu'
if "Antioquia" in d['location']:
try:
d['city'] = d['location'].split(',')[0]
paisa.append(d)
except:
continue
cal = Calendar.objects.get(shortcode='mde')
for d in paisa:
d['dateStamp'] =datetime.strptime(d['dateTime'][0], DATETIME_FORMAT)
try:
nvenue, created = Organization.objects.get_or_create(
name=d['venue'],
city=d['city'],
website=d['venueLink'],
address_complete = d['address'],
is_venue=True
)
except:
nvenue = Organization.objects.get(name=d['venue'])
nvenue.address_complete = d['address']
nvenue.save()
new_event, created = Event.objects.update_or_create(
event_type = d['category'],
show_title = d['title'],
show_link = d['link'],
show_date = d['dateStamp'],
show_day = d['dateStamp'],
scraper = scraper,
venue = nvenue
)
new_event.calendar.add(cal)
new_event.save()
print(new_event)
digitools.updateScraper(scraper, item_count_start)