-
Notifications
You must be signed in to change notification settings - Fork 39
Bugfix/non integer taxon keys #203
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c8b7c19
dcbdb64
db1d3d2
4d4be86
4d04c44
72b739d
4734dcf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -22,6 +22,47 @@ | |||||
| ) | ||||||
|
|
||||||
|
|
||||||
| # Keys that should have integer values in the GBIF API | ||||||
| # Taxonomic keys from GBIF's backbone taxonomy that should be integers | ||||||
| # Note: Other keys like DATASET_KEY, NETWORK_KEY are UUIDs and should remain strings | ||||||
| taxonomic_integer_keys = { | ||||||
| "TAXON_KEY", "SPECIES_KEY", "GENUS_KEY", "FAMILY_KEY", | ||||||
| "ORDER_KEY", "CLASS_KEY", "PHYLUM_KEY", "KINGDOM_KEY", | ||||||
| "SUBGENUS_KEY", "YEAR", "MONTH" | ||||||
| } | ||||||
|
|
||||||
| def _convert_to_int(value, key_name): | ||||||
| """ | ||||||
| Convert a value to integer, handling float-like strings (e.g., "5785887.0"). | ||||||
|
|
||||||
| :param value: The value to convert | ||||||
| :param key_name: The name of the key (for logging purposes) | ||||||
| :return: Integer value if conversion successful, otherwise raises ValueError | ||||||
| """ | ||||||
| if not isinstance(value, str): | ||||||
| return int(value) | ||||||
|
|
||||||
| try: | ||||||
| # First try direct integer conversion | ||||||
| return int(value) | ||||||
| except ValueError: | ||||||
| # If that fails, try converting via float (handles "5785887.0") | ||||||
| try: | ||||||
| float_val = float(value) | ||||||
| int_val = int(float_val) | ||||||
| # Check if it's actually an integer value | ||||||
| if float_val != int_val: | ||||||
| logging.warning( | ||||||
| f"Converting non-integer value {value} to {int_val} for {key_name}. " | ||||||
| f"Fractional part {float_val - int_val} will be lost." | ||||||
| ) | ||||||
| return int_val | ||||||
| except ValueError: | ||||||
| raise ValueError( | ||||||
| f"Cannot convert value '{value}' to integer for {key_name}. " | ||||||
| f"Expected a numeric value." | ||||||
| ) | ||||||
|
|
||||||
| # how to parse arguments/predicates | ||||||
| def _parse_args(x): | ||||||
| x = x.replace("'", '"') | ||||||
|
|
@@ -45,13 +86,27 @@ def _parse_args(x): | |||||
| "error: in predicate has to be associated with a list in square brackets (for example [1, 2, 3])" | ||||||
| ) | ||||||
| else: | ||||||
| return {"type": "in", "key": key, "values": json.loads(value_list.group(0))} | ||||||
| values = json.loads(value_list.group(0)) | ||||||
| # Convert taxonomic key values to integers | ||||||
| if key in taxonomic_integer_keys: | ||||||
| converted_values = [] | ||||||
| for v in values: | ||||||
| try: | ||||||
| converted_values.append(_convert_to_int(v, key)) | ||||||
| except ValueError as e: | ||||||
| raise ValueError(f"Error in 'in' predicate for {key}: {str(e)}") | ||||||
|
||||||
| raise ValueError(f"Error in 'in' predicate for {key}: {str(e)}") | |
| raise ValueError(f"Error in 'in' predicate for {key}: {str(e)}") from e |
Copilot
AI
Mar 25, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The integer coercion for in predicates will raise ValueError for common “float-like” inputs such as "5785887.0" (the reported issue) because str.isdigit() is false and the else branch still does int(v) without any guarding. Consider using a single safe coercion routine that accepts int, float values with .is_integer(), and strings matching ^\d+(\.0+)?$ (or Decimal) and otherwise leaves the value unchanged or raises a clearer error.
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -196,3 +196,170 @@ def test_geometry_predicate(self): | |||||
| payload["predicate"]["predicates"][0], | ||||||
| {"type": "within", "geometry": "POLYGON((-82.7 36.9, -85.0 35.6, -81.0 33.5, -79.4 36.3, -79.4 36.3, -82.7 36.9))"}, | ||||||
| ) | ||||||
|
|
||||||
| def test_taxonkey_integer_conversion(self): | ||||||
| """Test that taxonKey values are converted to integers (issue #179)""" | ||||||
| dl_key, payload = download( | ||||||
| "taxonKey = 3119195", | ||||||
| user="dummy", | ||||||
| email="dummy", | ||||||
| pwd="dummy", | ||||||
| ) | ||||||
| # taxonKey value should be an integer, not a string | ||||||
| self.assertDictEqual( | ||||||
| payload["predicate"]["predicates"][0], | ||||||
| {"key": "TAXON_KEY", "type": "equals", "value": 3119195}, | ||||||
| ) | ||||||
|
Comment on lines
+200
to
+212
|
||||||
| self.assertIsInstance(payload["predicate"]["predicates"][0]["value"], int) | ||||||
|
|
||||||
| def test_taxonkey_in_predicate_integer_conversion(self): | ||||||
| """Test that taxonKey values in 'in' predicate are converted to integers""" | ||||||
| dl_key, payload = download( | ||||||
| 'taxonKey in ["2387246", "2399391", "2364604"]', | ||||||
| user="dummy", | ||||||
| email="dummy", | ||||||
| pwd="dummy", | ||||||
| ) | ||||||
| # taxonKey values should be integers in the list | ||||||
| self.assertDictEqual( | ||||||
| payload["predicate"]["predicates"][0], | ||||||
| {"key": "TAXON_KEY", "type": "in", "values": [2387246, 2399391, 2364604]}, | ||||||
| ) | ||||||
| for val in payload["predicate"]["predicates"][0]["values"]: | ||||||
| self.assertIsInstance(val, int) | ||||||
|
|
||||||
| def test_year_integer_conversion(self): | ||||||
| """Test that year values are converted to integers""" | ||||||
| dl_key, payload = download( | ||||||
| "year = 2023", | ||||||
| user="dummy", | ||||||
| email="dummy", | ||||||
| pwd="dummy", | ||||||
| ) | ||||||
| self.assertDictEqual( | ||||||
| payload["predicate"]["predicates"][0], | ||||||
| {"key": "YEAR", "type": "equals", "value": 2023}, | ||||||
| ) | ||||||
| self.assertIsInstance(payload["predicate"]["predicates"][0]["value"], int) | ||||||
|
|
||||||
| def test_depth_numeric_conversion(self): | ||||||
|
||||||
| def test_depth_numeric_conversion(self): | |
| def test_depth_numeric_like_value_remains_string(self): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The comments/variable name imply these are strictly taxonomic backbone keys, but the set also includes YEAR and MONTH. To avoid confusion for future maintenance, either rename this set to something like integer_predicate_keys (or similar) or adjust the comment to match its actual contents.