From 04d2df2adc5357cbddef2ea5496f7bf3453ba282 Mon Sep 17 00:00:00 2001 From: David Przybilla Date: Fri, 11 Mar 2016 17:48:52 +0000 Subject: [PATCH] extrayendo contratante --- contra/contract.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/contra/contract.py b/contra/contract.py index 3e19aae..a76c995 100644 --- a/contra/contract.py +++ b/contra/contract.py @@ -56,11 +56,18 @@ def extract_name(td_tag): return {"name": name, "url" : url, "description": description, "publication_date": publication_date} return None + def extract_contractor(self): + matches = CSSSelector(".subtitulos a")(self.parsed_content) + contractor = "" + for match in matches: + contractor = match.text.strip() + return contractor def parse(self): contract_representation = dict() contract_representation['documents'] = list() + contract_representation['contratante'] = self.extract_contractor() tr_tags = CSSSelector("tr")(self.parsed_content) for tr_tag in tr_tags: @@ -78,6 +85,7 @@ def parse(self): document = self.extract_doc(td_tags) if document: contract_representation['documents'].append(document) + return contract_representation def parse_contract_page(page_file):