DoD Contracts, Maps and Austria

This commit is contained in:
2026-02-19 22:49:47 -05:00
parent 3c4a41ae2c
commit 3eda7647e9
13 changed files with 469 additions and 38 deletions

View File

@@ -49,31 +49,33 @@ def getLinks(br, url, links):
div = br.find_element(By.XPATH, ".//*/div[@class='feed-pagination flexify']/a")
div.click()
sleep(2)
br.execute_script("window.scrollTo(0, window.scrollY + 1100)")
br.execute_script("window.scrollTo(0, window.scrollY + 1375)")
sleep(2)
except:
x = 0
ps = html.fromstring(br.page_source)
newLinks = []
# newlinks = ps.xpath('.//*/div[@class="e-con-inner"]/*/a/@href')
events = ps.xpath('.//*/div[@class="e-con-inner"]')
events = ps.xpath('.//*/div[@class="ts-preview"]')
for event in events:
ev = {}
try:
ev['link'] = event.xpath('.//*/a/@href')[0]
ev['link'] = event.xpath('.//*/h3/a/@href')[0]
ev['title'] = event.xpath('.//*/h3/a/text()')[0]
ev['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0].replace('\n', '').replace('\t', '')
# e['venue'] = event.xpath('.//*/ul/li/a/text()')[-1:][0]
ev['venueLink'] = event.xpath('.//*/ul/li/a/@href')[1]
ev['venue'] = event.xpath('.//*/ul/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/a[@class="ts-action-con"]/text()')[-1:][0].replace('\n', '').replace('\t', '')
ev['venueLink'] = event.xpath('.//*/ul/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/a[@class="ts-action-con"]/@href')[-1:][0]
# ev['venueLink'] = event.xpath('.//*/ul/li/a/@href')[1]
label= event.xpath('.//*/li[@class="elementor-repeater-item-46edd7d flexify ts-action"]/div/text()')
ev['label'] = ''.join([x.replace('\t', '').replace('\n', '') for x in label]).strip()
newLinks.append(ev)
# ppr(ev)
except Exception as e:
print("Error: ", ev, e)
links = links + newLinks
return links
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
@@ -89,11 +91,15 @@ for url in urls:
allLinks = getLinks(br, url, allLinks)
totalLinks = list({v['title']:v for v in allLinks}.values())
ppr(totalLinks)
ppr(len(totalLinks))
# sortedlinks = allLinks.sort()
# ppr(sortedlinks)
for event in totalLinks:
# quit()
for event in totalLinks[175:250]:
br.get(event['link'])
sleep(1)
ps = html.fromstring(br.page_source)
@@ -156,6 +162,10 @@ for d in new_data:
cal = Calendar.objects.get(shortcode='mde')
print("run paisa")
ppr(paisa)
for d in paisa:
try:
nvenue, created = Organization.objects.get_or_create(
@@ -170,17 +180,20 @@ for d in paisa:
nvenue.address_complete = d['address']
nvenue.save()
new_event, created = Event.objects.update_or_create(
event_type = d['category'],
show_title = d['title'],
show_link = d['link'],
show_date = d['dateStamp'],
show_day = d['dateStamp'],
scraper = scraper,
venue = nvenue
)
new_event.calendar.add(cal)
new_event.save()
# print("Success:", new_event)
try:
new_event, created = Event.objects.update_or_create(
event_type = d['category'],
show_title = d['title'],
show_link = d['link'],
show_date = d['dateStamp'],
scraper = scraper,
venue = nvenue
)
new_event.calendar.add(cal)
new_event.save()
print(new_event, created, new_event.scraper)
# print("Success:", new_event)
except Execption as e:
print(e)
digitools.updateScraper(scraper, item_count_start)