From 04d2df2adc5357cbddef2ea5496f7bf3453ba282 Mon Sep 17 00:00:00 2001
From: David Przybilla
Date: Fri, 11 Mar 2016 17:48:52 +0000
Subject: [PATCH] extrayendo contratante
---
contra/contract.py | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/contra/contract.py b/contra/contract.py
index 3e19aae..a76c995 100644
--- a/contra/contract.py
+++ b/contra/contract.py
@@ -56,11 +56,18 @@ def extract_name(td_tag):
return {"name": name, "url" : url, "description": description, "publication_date": publication_date}
return None
+ def extract_contractor(self):
+ matches = CSSSelector(".subtitulos a")(self.parsed_content)
+ contractor = ""
+ for match in matches:
+ contractor = match.text.strip()
+ return contractor
def parse(self):
contract_representation = dict()
contract_representation['documents'] = list()
+ contract_representation['contratante'] = self.extract_contractor()
tr_tags = CSSSelector("tr")(self.parsed_content)
for tr_tag in tr_tags:
@@ -78,6 +85,7 @@ def parse(self):
document = self.extract_doc(td_tags)
if document:
contract_representation['documents'].append(document)
+
return contract_representation
def parse_contract_page(page_file):