Skip to content

Commit

Permalink
use unidecode to convert unicode to ascii
Browse files Browse the repository at this point in the history
  • Loading branch information
hbruch committed Nov 29, 2024
1 parent 59354a5 commit f6bd68f
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 10 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ requests~=2.32.3
python-decouple~=3.8
websockets~=13.0
xmltodict~=0.13
unidecode~=1.3
12 changes: 2 additions & 10 deletions x2gbfs/providers/cambio.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Any

from x2gbfs.gbfs.base_provider import BaseProvider
from x2gbfs.util import get
from x2gbfs.util import get, unidecode_with_german_umlauts

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -123,14 +123,6 @@ def _extract_vehicle_types_available(self, elem: dict[str, Any]) -> list[dict[st
vehicle_classes_at_station = elem.get('vehicleClasses', [])
return [{'vehicle_type_id': vehicle_class['id'], 'count': 1} for vehicle_class in vehicle_classes_at_station]

def _replace_umlauts(self, string: str) -> str:
substitions = (('ü', 'ue'), ('ä', 'ae'), ('ö', 'oe'), ('é', 'e'), ('ß', 'ss'))

for orig, subst in substitions:
string = string.replace(orig, subst)

return string

def _extract_propulsion_type(self, elem: dict[str, str]) -> str:
"""
Guesses the propulsion type from vehicle name.
Expand All @@ -148,7 +140,7 @@ def _extract_station_rental_uris(self, elem: dict[str, str]) -> dict[str, str]:
"""
Guesses the propulsion type from vehicle name.
"""
station_name = self._replace_umlauts(elem['name'].lower())
station_name = unidecode_with_german_umlauts(elem['name'].lower())
station_url = f"https://www.cambio-carsharing.de/stationen/station/{station_name}-{elem['id']}"

return {
Expand Down
12 changes: 12 additions & 0 deletions x2gbfs/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@
from typing import Any, Dict, Generator, Optional, Tuple

import requests
from unidecode import unidecode

GERMAN_UMLAUTS_TRANSLATIONS = str.maketrans({'ä': 'ae', 'Ä': 'Ae', 'ö': 'oe', 'Ö': 'Oe', 'ü': 'ue', 'Ü': 'Ue'})


def unidecode_with_german_umlauts(string: str) -> str:
"""
Represents non-ascii Unicode string as their closest matching ascii variants.
As unidecode transliters German umlauts incorrectly (see the FAQ section of https://pypi.org/project/Unidecode/),
unidecode_with_german_umlauts first replaces them explicitly.
"""
return unidecode(string.translate(GERMAN_UMLAUTS_TRANSLATIONS))


def timestamp_to_isoformat(utctimestamp: datetime):
Expand Down

0 comments on commit f6bd68f

Please sign in to comment.