From f1759aed6775df23257a9701e8ea352bd23da543 Mon Sep 17 00:00:00 2001 From: Zilin Du Date: Tue, 11 Mar 2025 20:14:12 -0700 Subject: [PATCH 1/3] add airlines field in the proto so that we can search flights for given airlines. --- fast_flights/flights.proto | 1 + fast_flights/flights_impl.py | 8 +++++- fast_flights/flights_pb2.py | 49 ++++++++++++++++++++++-------------- 3 files changed, 38 insertions(+), 20 deletions(-) diff --git a/fast_flights/flights.proto b/fast_flights/flights.proto index b65a93ac..a939112f 100644 --- a/fast_flights/flights.proto +++ b/fast_flights/flights.proto @@ -6,6 +6,7 @@ message Airport { message FlightData { string date = 2; + repeated string airlines = 6; Airport from_flight = 13; Airport to_flight = 14; optional int32 max_stops = 5; diff --git a/fast_flights/flights_impl.py b/fast_flights/flights_impl.py index ef7e84f5..8e462841 100644 --- a/fast_flights/flights_impl.py +++ b/fast_flights/flights_impl.py @@ -17,13 +17,15 @@ class FlightData: date (str): Date. from_airport (str): Departure (airport). Where from? to_airport (str): Arrival (airport). Where to? + airlines (list(str), optional): A list of airlines. Default is None. max_stops (int, optional): Maximum number of stops. Default is None. """ - __slots__ = ("date", "from_airport", "to_airport", "max_stops") + __slots__ = ("date", "from_airport", "to_airport", "airlines", "max_stops") date: str from_airport: str to_airport: str + airlines: Optional[List[str]] max_stops: Optional[int] def __init__( @@ -32,6 +34,7 @@ def __init__( date: str, from_airport: Union[Airport, str], to_airport: Union[Airport, str], + airlines: Optional[List[str]] = None, max_stops: Optional[int] = None, ): self.date = date @@ -41,6 +44,7 @@ def __init__( self.to_airport = ( to_airport.value if isinstance(to_airport, Airport) else to_airport ) + self.airlines = airlines self.max_stops = max_stops def attach(self, info: PB.Info) -> None: # type: ignore @@ -48,6 +52,8 @@ def attach(self, info: PB.Info) -> None: # type: ignore data.date = self.date data.from_flight.airport = self.from_airport data.to_flight.airport = self.to_airport + if self.airlines is not None: + data.airlines.extend(self.airlines) if self.max_stops is not None: data.max_stops = self.max_stops diff --git a/fast_flights/flights_pb2.py b/fast_flights/flights_pb2.py index ff7fd3ef..d54b219f 100644 --- a/fast_flights/flights_pb2.py +++ b/fast_flights/flights_pb2.py @@ -1,11 +1,22 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE # source: flights.proto +# Protobuf Python Version: 5.29.3 """Generated protocol buffer code.""" -from google.protobuf.internal import builder as _builder from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, + 5, + 29, + 3, + '', + 'flights.proto' +) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() @@ -13,23 +24,23 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rflights.proto\"\x1a\n\x07\x41irport\x12\x0f\n\x07\x61irport\x18\x02 \x01(\t\"|\n\nFlightData\x12\x0c\n\x04\x64\x61te\x18\x02 \x01(\t\x12\x1d\n\x0b\x66rom_flight\x18\r \x01(\x0b\x32\x08.Airport\x12\x1b\n\tto_flight\x18\x0e \x01(\x0b\x32\x08.Airport\x12\x16\n\tmax_stops\x18\x05 \x01(\x05H\x00\x88\x01\x01\x42\x0c\n\n_max_stops\"k\n\x04Info\x12\x19\n\x04\x64\x61ta\x18\x03 \x03(\x0b\x32\x0b.FlightData\x12\x13\n\x04seat\x18\t \x01(\x0e\x32\x05.Seat\x12\x1e\n\npassengers\x18\x08 \x03(\x0e\x32\n.Passenger\x12\x13\n\x04trip\x18\x13 \x01(\x0e\x32\x05.Trip*S\n\x04Seat\x12\x10\n\x0cUNKNOWN_SEAT\x10\x00\x12\x0b\n\x07\x45\x43ONOMY\x10\x01\x12\x13\n\x0fPREMIUM_ECONOMY\x10\x02\x12\x0c\n\x08\x42USINESS\x10\x03\x12\t\n\x05\x46IRST\x10\x04*E\n\x04Trip\x12\x10\n\x0cUNKNOWN_TRIP\x10\x00\x12\x0e\n\nROUND_TRIP\x10\x01\x12\x0b\n\x07ONE_WAY\x10\x02\x12\x0e\n\nMULTI_CITY\x10\x03*_\n\tPassenger\x12\x15\n\x11UNKNOWN_PASSENGER\x10\x00\x12\t\n\x05\x41\x44ULT\x10\x01\x12\t\n\x05\x43HILD\x10\x02\x12\x12\n\x0eINFANT_IN_SEAT\x10\x03\x12\x11\n\rINFANT_ON_LAP\x10\x04\x62\x06proto3') - -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'flights_pb2', globals()) -if _descriptor._USE_C_DESCRIPTORS == False: +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rflights.proto\"\x1a\n\x07\x41irport\x12\x0f\n\x07\x61irport\x18\x02 \x01(\t\"\x8e\x01\n\nFlightData\x12\x0c\n\x04\x64\x61te\x18\x02 \x01(\t\x12\x10\n\x08\x61irlines\x18\x06 \x03(\t\x12\x1d\n\x0b\x66rom_flight\x18\r \x01(\x0b\x32\x08.Airport\x12\x1b\n\tto_flight\x18\x0e \x01(\x0b\x32\x08.Airport\x12\x16\n\tmax_stops\x18\x05 \x01(\x05H\x00\x88\x01\x01\x42\x0c\n\n_max_stops\"k\n\x04Info\x12\x19\n\x04\x64\x61ta\x18\x03 \x03(\x0b\x32\x0b.FlightData\x12\x13\n\x04seat\x18\t \x01(\x0e\x32\x05.Seat\x12\x1e\n\npassengers\x18\x08 \x03(\x0e\x32\n.Passenger\x12\x13\n\x04trip\x18\x13 \x01(\x0e\x32\x05.Trip*S\n\x04Seat\x12\x10\n\x0cUNKNOWN_SEAT\x10\x00\x12\x0b\n\x07\x45\x43ONOMY\x10\x01\x12\x13\n\x0fPREMIUM_ECONOMY\x10\x02\x12\x0c\n\x08\x42USINESS\x10\x03\x12\t\n\x05\x46IRST\x10\x04*E\n\x04Trip\x12\x10\n\x0cUNKNOWN_TRIP\x10\x00\x12\x0e\n\nROUND_TRIP\x10\x01\x12\x0b\n\x07ONE_WAY\x10\x02\x12\x0e\n\nMULTI_CITY\x10\x03*_\n\tPassenger\x12\x15\n\x11UNKNOWN_PASSENGER\x10\x00\x12\t\n\x05\x41\x44ULT\x10\x01\x12\t\n\x05\x43HILD\x10\x02\x12\x12\n\x0eINFANT_IN_SEAT\x10\x03\x12\x11\n\rINFANT_ON_LAP\x10\x04\x62\x06proto3') - DESCRIPTOR._options = None - _SEAT._serialized_start=280 - _SEAT._serialized_end=363 - _TRIP._serialized_start=365 - _TRIP._serialized_end=434 - _PASSENGER._serialized_start=436 - _PASSENGER._serialized_end=531 - _AIRPORT._serialized_start=17 - _AIRPORT._serialized_end=43 - _FLIGHTDATA._serialized_start=45 - _FLIGHTDATA._serialized_end=169 - _INFO._serialized_start=171 - _INFO._serialized_end=278 +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'flights_pb2', _globals) +if not _descriptor._USE_C_DESCRIPTORS: + DESCRIPTOR._loaded_options = None + _globals['_SEAT']._serialized_start=299 + _globals['_SEAT']._serialized_end=382 + _globals['_TRIP']._serialized_start=384 + _globals['_TRIP']._serialized_end=453 + _globals['_PASSENGER']._serialized_start=455 + _globals['_PASSENGER']._serialized_end=550 + _globals['_AIRPORT']._serialized_start=17 + _globals['_AIRPORT']._serialized_end=43 + _globals['_FLIGHTDATA']._serialized_start=46 + _globals['_FLIGHTDATA']._serialized_end=188 + _globals['_INFO']._serialized_start=190 + _globals['_INFO']._serialized_end=297 # @@protoc_insertion_point(module_scope) From bd05c7a1627075b617c3b0d7149f8e9479029044 Mon Sep 17 00:00:00 2001 From: Zilin Du Date: Tue, 11 Mar 2025 21:02:52 -0700 Subject: [PATCH 2/3] make from_airport/to_airport repeated so that it can search multiple airports. --- fast_flights/flights.proto | 4 ++-- fast_flights/flights_impl.py | 12 +++++++----- fast_flights/flights_pb2.py | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/fast_flights/flights.proto b/fast_flights/flights.proto index a939112f..cfc272dd 100644 --- a/fast_flights/flights.proto +++ b/fast_flights/flights.proto @@ -7,8 +7,8 @@ message Airport { message FlightData { string date = 2; repeated string airlines = 6; - Airport from_flight = 13; - Airport to_flight = 14; + repeated Airport from_flight = 13; + repeated Airport to_flight = 14; optional int32 max_stops = 5; } diff --git a/fast_flights/flights_impl.py b/fast_flights/flights_impl.py index 8e462841..37ba4fc6 100644 --- a/fast_flights/flights_impl.py +++ b/fast_flights/flights_impl.py @@ -40,20 +40,22 @@ def __init__( self.date = date self.from_airport = ( from_airport.value if isinstance(from_airport, Airport) else from_airport - ) + ).split(',') self.to_airport = ( to_airport.value if isinstance(to_airport, Airport) else to_airport - ) + ).split(',') self.airlines = airlines self.max_stops = max_stops def attach(self, info: PB.Info) -> None: # type: ignore data = info.data.add() data.date = self.date - data.from_flight.airport = self.from_airport - data.to_flight.airport = self.to_airport + for from_airport in self.from_airport: + data.from_flight.add().airport = from_airport + for to_airport in self.to_airport: + data.to_flight.add().airport = to_airport if self.airlines is not None: - data.airlines.extend(self.airlines) + data.airlines[:] = self.airlines if self.max_stops is not None: data.max_stops = self.max_stops diff --git a/fast_flights/flights_pb2.py b/fast_flights/flights_pb2.py index d54b219f..47d93135 100644 --- a/fast_flights/flights_pb2.py +++ b/fast_flights/flights_pb2.py @@ -24,7 +24,7 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rflights.proto\"\x1a\n\x07\x41irport\x12\x0f\n\x07\x61irport\x18\x02 \x01(\t\"\x8e\x01\n\nFlightData\x12\x0c\n\x04\x64\x61te\x18\x02 \x01(\t\x12\x10\n\x08\x61irlines\x18\x06 \x03(\t\x12\x1d\n\x0b\x66rom_flight\x18\r \x01(\x0b\x32\x08.Airport\x12\x1b\n\tto_flight\x18\x0e \x01(\x0b\x32\x08.Airport\x12\x16\n\tmax_stops\x18\x05 \x01(\x05H\x00\x88\x01\x01\x42\x0c\n\n_max_stops\"k\n\x04Info\x12\x19\n\x04\x64\x61ta\x18\x03 \x03(\x0b\x32\x0b.FlightData\x12\x13\n\x04seat\x18\t \x01(\x0e\x32\x05.Seat\x12\x1e\n\npassengers\x18\x08 \x03(\x0e\x32\n.Passenger\x12\x13\n\x04trip\x18\x13 \x01(\x0e\x32\x05.Trip*S\n\x04Seat\x12\x10\n\x0cUNKNOWN_SEAT\x10\x00\x12\x0b\n\x07\x45\x43ONOMY\x10\x01\x12\x13\n\x0fPREMIUM_ECONOMY\x10\x02\x12\x0c\n\x08\x42USINESS\x10\x03\x12\t\n\x05\x46IRST\x10\x04*E\n\x04Trip\x12\x10\n\x0cUNKNOWN_TRIP\x10\x00\x12\x0e\n\nROUND_TRIP\x10\x01\x12\x0b\n\x07ONE_WAY\x10\x02\x12\x0e\n\nMULTI_CITY\x10\x03*_\n\tPassenger\x12\x15\n\x11UNKNOWN_PASSENGER\x10\x00\x12\t\n\x05\x41\x44ULT\x10\x01\x12\t\n\x05\x43HILD\x10\x02\x12\x12\n\x0eINFANT_IN_SEAT\x10\x03\x12\x11\n\rINFANT_ON_LAP\x10\x04\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rflights.proto\"\x1a\n\x07\x41irport\x12\x0f\n\x07\x61irport\x18\x02 \x01(\t\"\x8e\x01\n\nFlightData\x12\x0c\n\x04\x64\x61te\x18\x02 \x01(\t\x12\x10\n\x08\x61irlines\x18\x06 \x03(\t\x12\x1d\n\x0b\x66rom_flight\x18\r \x03(\x0b\x32\x08.Airport\x12\x1b\n\tto_flight\x18\x0e \x03(\x0b\x32\x08.Airport\x12\x16\n\tmax_stops\x18\x05 \x01(\x05H\x00\x88\x01\x01\x42\x0c\n\n_max_stops\"k\n\x04Info\x12\x19\n\x04\x64\x61ta\x18\x03 \x03(\x0b\x32\x0b.FlightData\x12\x13\n\x04seat\x18\t \x01(\x0e\x32\x05.Seat\x12\x1e\n\npassengers\x18\x08 \x03(\x0e\x32\n.Passenger\x12\x13\n\x04trip\x18\x13 \x01(\x0e\x32\x05.Trip*S\n\x04Seat\x12\x10\n\x0cUNKNOWN_SEAT\x10\x00\x12\x0b\n\x07\x45\x43ONOMY\x10\x01\x12\x13\n\x0fPREMIUM_ECONOMY\x10\x02\x12\x0c\n\x08\x42USINESS\x10\x03\x12\t\n\x05\x46IRST\x10\x04*E\n\x04Trip\x12\x10\n\x0cUNKNOWN_TRIP\x10\x00\x12\x0e\n\nROUND_TRIP\x10\x01\x12\x0b\n\x07ONE_WAY\x10\x02\x12\x0e\n\nMULTI_CITY\x10\x03*_\n\tPassenger\x12\x15\n\x11UNKNOWN_PASSENGER\x10\x00\x12\t\n\x05\x41\x44ULT\x10\x01\x12\t\n\x05\x43HILD\x10\x02\x12\x12\n\x0eINFANT_IN_SEAT\x10\x03\x12\x11\n\rINFANT_ON_LAP\x10\x04\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) From 885fa798a14dd85e1343322520ba46d3592aed1b Mon Sep 17 00:00:00 2001 From: Zilin Du Date: Tue, 11 Mar 2025 21:48:16 -0700 Subject: [PATCH 3/3] parse from_airpot and to_airport from flight results. --- fast_flights/core.py | 10 ++++++++++ fast_flights/schema.py | 2 ++ 2 files changed, 12 insertions(+) diff --git a/fast_flights/core.py b/fast_flights/core.py index 9ab03bad..47f0a949 100644 --- a/fast_flights/core.py +++ b/fast_flights/core.py @@ -105,6 +105,14 @@ def safe(n: Optional[LexborNode]): name = safe(item.css_first("div.sSHqwe.tPgKwe.ogfYpf span")).text( strip=True ) + # from_airport - to_airport + from_to_airport_node = item.css("span.PTuQse.sSHqwe.tPgKwe.ogfYpf div.QylvBf span span span") + try: + from_airport = from_to_airport_node[0].text(strip=True) + to_airport = from_to_airport_node[1].text(strip=True) + except IndexError: + from_airport = "" + to_airport = "" # Get departure & arrival time dp_ar_node = item.css("span.mv1WYe div") @@ -141,6 +149,8 @@ def safe(n: Optional[LexborNode]): { "is_best": is_best_flight, "name": name, + "from_airport": from_airport, + "to_airport": to_airport, "departure": " ".join(departure_time.split()), "arrival": " ".join(arrival_time.split()), "arrival_time_ahead": time_ahead, diff --git a/fast_flights/schema.py b/fast_flights/schema.py index a5572d6d..9c178bc2 100644 --- a/fast_flights/schema.py +++ b/fast_flights/schema.py @@ -14,6 +14,8 @@ class Result: class Flight: is_best: bool name: str + from_airport: str + to_airport: str departure: str arrival: str arrival_time_ahead: str