DoD Contracts, Maps and Austria
This commit is contained in:
72
Working/venues/getMaps.py
Normal file
72
Working/venues/getMaps.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import os, sys
|
||||
from datetime import datetime, timedelta
|
||||
from dateutil import relativedelta
|
||||
from pprint import pprint as ppr
|
||||
|
||||
sys.path.append('/var/www/digisnaxx.ado/scrapers')
|
||||
import dtss
|
||||
dtss.getReady()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
from events.models import Organization, Scraper, Calendar, Event
|
||||
import events.digitools as digitools
|
||||
|
||||
import re
|
||||
|
||||
def getSite(br, website, org):
|
||||
ps = digitools.getSource(br, website)
|
||||
links = ps.xpath('.//*/div[@class="luibr"]/div/div/a/@href')
|
||||
ppr(links)
|
||||
nsite = "https://www.google.com"
|
||||
if len(links) > 0:
|
||||
nlink = nsite + links[0]
|
||||
org.barrio = nlink
|
||||
org.save()
|
||||
sleep(5)
|
||||
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
br.close()
|
||||
quit()
|
||||
|
||||
|
||||
orgs = Organization.objects.all()
|
||||
|
||||
|
||||
# for org in orgs:
|
||||
# try:
|
||||
# if len(org.city) > 0:
|
||||
# msg = org.name.split(" ") + org.city.split(" ")
|
||||
# site = "https://www.google.com/search?q=google+maps+" + "+".join(msg)
|
||||
# print(site)
|
||||
# getSite(br, site, org)
|
||||
# except Exception as e:
|
||||
# ppr(org)
|
||||
# print(e)
|
||||
|
||||
|
||||
for org in orgs[5:]:
|
||||
try:
|
||||
br.get(org.barrio)
|
||||
org.gmap_link = org.barrio
|
||||
sleep(5)
|
||||
current_url = br.current_url
|
||||
match = re.search(r'@(-?\d+\.\d+),(-?\d+\.\d+)', current_url)
|
||||
print(match[1], match[0].split(",")[1])
|
||||
org.latitude = match[1].strip()
|
||||
org.longitude = match[0].split(",")[1].strip()
|
||||
org.save()
|
||||
except Exception as e:
|
||||
ppr(org)
|
||||
print(e)
|
||||
|
||||
br.close()
|
||||
@@ -44,7 +44,7 @@ def get_events(ps, event_type):
|
||||
try:
|
||||
event['support'] = c.xpath('.//p[@class="fs-12 supporting-talent"]/text()')[0]
|
||||
except:
|
||||
pass
|
||||
continue
|
||||
event['venue'] = c.xpath('.//p[@class="fs-12 venue"]/text()')[0]
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = venue.website
|
||||
|
||||
@@ -49,31 +49,33 @@ def getLinks(br, url, links):
|
||||
div = br.find_element(By.XPATH, ".//*/div[@class='feed-pagination flexify']/a")
|
||||
div.click()
|
||||
sleep(2)
|
||||
br.execute_script("window.scrollTo(0, window.scrollY + 1100)")
|
||||
br.execute_script("window.scrollTo(0, window.scrollY + 1375)")
|
||||
sleep(2)
|
||||
except:
|
||||
x = 0
|
||||
ps = html.fromstring(br.page_source)
|
||||
newLinks = []
|
||||
# newlinks = ps.xpath('.//*/div[@class="e-con-inner"]/*/a/@href')
|
||||
events = ps.xpath('.//*/div[@class="e-con-inner"]')
|
||||
events = ps.xpath('.//*/div[@class="ts-preview"]')
|
||||
for event in events:
|
||||
ev = {}
|
||||
try:
|
||||
ev['link'] = event.xpath('.//*/a/@href')[0]
|
||||
ev['link'] = event.xpath('.//*/h3/a/@href')[0]
|
||||
ev['title'] = event.xpath('.//*/h3/a/text()')[0]
|
||||
ev['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0].replace('\n', '').replace('\t', '')
|
||||
# e['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0]
|
||||
ev['venueLink'] = event.xpath('.//*/ul/li/a/@href')[1]
|
||||
ev['venue'] = event.xpath('.//*/ul/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/a[@class="ts-action-con"]/text()')[-1:][0].replace('\n', '').replace('\t', '')
|
||||
ev['venueLink'] = event.xpath('.//*/ul/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/a[@class="ts-action-con"]/@href')[-1:][0]
|
||||
# ev['venueLink'] = event.xpath('.//*/ul/li/a/@href')[1]
|
||||
label= event.xpath('.//*/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/div/text()')
|
||||
ev['label'] = ''.join([x.replace('\t', '').replace('\n', '') for x in label]).strip()
|
||||
newLinks.append(ev)
|
||||
# ppr(ev)
|
||||
except Exception as e:
|
||||
print("Error: ", ev, e)
|
||||
|
||||
links = links + newLinks
|
||||
return links
|
||||
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
@@ -89,11 +91,15 @@ for url in urls:
|
||||
allLinks = getLinks(br, url, allLinks)
|
||||
|
||||
totalLinks = list({v['title']:v for v in allLinks}.values())
|
||||
ppr(totalLinks)
|
||||
ppr(len(totalLinks))
|
||||
# sortedlinks = allLinks.sort()
|
||||
# ppr(sortedlinks)
|
||||
|
||||
for event in totalLinks:
|
||||
# quit()
|
||||
|
||||
|
||||
for event in totalLinks[175:250]:
|
||||
br.get(event['link'])
|
||||
sleep(1)
|
||||
ps = html.fromstring(br.page_source)
|
||||
@@ -156,6 +162,10 @@ for d in new_data:
|
||||
|
||||
cal = Calendar.objects.get(shortcode='mde')
|
||||
|
||||
print("run paisa")
|
||||
|
||||
ppr(paisa)
|
||||
|
||||
for d in paisa:
|
||||
try:
|
||||
nvenue, created = Organization.objects.get_or_create(
|
||||
@@ -170,17 +180,20 @@ for d in paisa:
|
||||
nvenue.address_complete = d['address']
|
||||
nvenue.save()
|
||||
|
||||
new_event, created = Event.objects.update_or_create(
|
||||
event_type = d['category'],
|
||||
show_title = d['title'],
|
||||
show_link = d['link'],
|
||||
show_date = d['dateStamp'],
|
||||
show_day = d['dateStamp'],
|
||||
scraper = scraper,
|
||||
venue = nvenue
|
||||
)
|
||||
new_event.calendar.add(cal)
|
||||
new_event.save()
|
||||
# print("Success:", new_event)
|
||||
try:
|
||||
new_event, created = Event.objects.update_or_create(
|
||||
event_type = d['category'],
|
||||
show_title = d['title'],
|
||||
show_link = d['link'],
|
||||
show_date = d['dateStamp'],
|
||||
scraper = scraper,
|
||||
venue = nvenue
|
||||
)
|
||||
new_event.calendar.add(cal)
|
||||
new_event.save()
|
||||
print(new_event, created, new_event.scraper)
|
||||
# print("Success:", new_event)
|
||||
except Execption as e:
|
||||
print(e)
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
@@ -81,11 +81,11 @@ for event in events:
|
||||
show_title = event["show_title"],
|
||||
show_link = event["link"],
|
||||
show_date = event["date_time"],
|
||||
show_day = event["date_time"],
|
||||
guests = " ".join(event["subtitle"]),
|
||||
venue = venue
|
||||
)
|
||||
digitools.add_calendar(new_event, 'msp')
|
||||
ppr(new_event)
|
||||
except Exception as e:
|
||||
print("oops ", e, "\n\n", "Scraper:", scraper)
|
||||
|
||||
|
||||
@@ -55,7 +55,6 @@ def get_events(ps, event_type):
|
||||
show_title = event['title'],
|
||||
show_link = event['link'],
|
||||
show_date = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
|
||||
more_details = event["details"],
|
||||
venue = venue
|
||||
)
|
||||
@@ -71,7 +70,6 @@ def get_events(ps, event_type):
|
||||
show_title = event['title'],
|
||||
show_link = event['link'],
|
||||
show_date = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(event['date'] + tz_str, DATETIME_FORMAT),
|
||||
more_details = event["details"],
|
||||
venue = venue
|
||||
)
|
||||
|
||||
@@ -26,8 +26,8 @@ venue, created = Organization.objects.get_or_create(
|
||||
scraper,item_count_start, virtcal = digitools.getScraper(venue, venue.website, 'msp')
|
||||
|
||||
tz_str = " -0600 UTC"
|
||||
DATETIME_FORMAT = '%a %B %d @ %I:%M %p %Y %z %Z'
|
||||
DATETIME_FORMAT_2 = '%a %B %d, %Y @ %I:%M %p %z %Z'
|
||||
DATETIME_FORMAT = '%a %B %d @ %I:%M %p %Y %z %Z'
|
||||
DATETIME_FORMAT_2 = '%a %B %d @ %I:%M %p %Y %z %Z'
|
||||
calendar_url = 'https://noboolpresents.com/venues/uptown-vfw/'
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
@@ -50,32 +50,33 @@ def getEvents(br):
|
||||
title = event.xpath('.//*/h2[@class="alt-font"]/a/text()')[0].replace("\n", "").replace("\t", "")
|
||||
link = event.xpath('.//*/h2[@class="alt-font"]/a/@href')[0]
|
||||
try:
|
||||
print("First Try", dateTime)
|
||||
new_event, created = Event.objects.update_or_create(
|
||||
scraper = scraper,
|
||||
event_type = 'Mu',
|
||||
show_title = title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(dateTime + tz_str, DATETIME_FORMAT),
|
||||
venue = venue
|
||||
)
|
||||
digitools.add_calendar(new_event, 'msp')
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
try:
|
||||
print("New Try", dateTime)
|
||||
new_event, created = Event.objects.update_or_create(
|
||||
scraper = scraper,
|
||||
event_type = 'Mu',
|
||||
show_title = title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime[:-4].strip() + tz_str, DATETIME_FORMAT_2),
|
||||
show_day = datetime.strptime(dateTime[:-4].strip() + tz_str, DATETIME_FORMAT_2),
|
||||
show_date = datetime.strptime(dateTime.strip() + tz_str, DATETIME_FORMAT_2),
|
||||
venue = venue
|
||||
)
|
||||
digitools.add_calendar(new_event, 'msp')
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print("oops", e, dateTime[:-4])
|
||||
print("oops", e)
|
||||
ppr(event)
|
||||
|
||||
getEvents(br)
|
||||
br.find_element(By.XPATH, './/*/li[@class="tribe-events-c-nav__list-item tribe-events-c-nav__list-item--next"]/a').click()
|
||||
|
||||
Reference in New Issue
Block a user