dexter has uploaded this change for review. ( https://gerrit.osmocom.org/c/pysim/+/33941 )
Change subject: construct: add adapter Utf8Adapter to safely interpret utf8 text ......................................................................
construct: add adapter Utf8Adapter to safely interpret utf8 text
Uninitialized Files, File records or fields in a File record or File usually contain a string of 0xff bytes. This becomes a problem when the content is normally encoded/decoded as utf8 since by the construct parser. The parser will throw an expection when it tries to decode the 0xff string as utf8. This is especially a serious problem in pySim-trace where an execption stops the parser.
Let's fix this by interpreting a string of 0xff as an empty string.
Related: OS#6094 Change-Id: Id114096ccb8b7ff8fcc91e1ef3002526afa09cb7 --- M pySim/construct.py M pySim/ts_31_102.py M pySim/ts_31_103.py 3 files changed, 44 insertions(+), 10 deletions(-)
git pull ssh://gerrit.osmocom.org:29418/pysim refs/changes/41/33941/1
diff --git a/pySim/construct.py b/pySim/construct.py index ab44a63..af96b49 100644 --- a/pySim/construct.py +++ b/pySim/construct.py @@ -6,6 +6,7 @@ from construct.lib import integertypes from pySim.utils import b2h, h2b, swap_nibbles import gsm0338 +import codecs
"""Utility code related to the integration of the 'construct' declarative parser."""
@@ -34,6 +35,18 @@ def _encode(self, obj, context, path): return h2b(obj)
+class Utf8Adapter(Adapter): + """convert a bytes() type that contains utf8 encoded text to human readable text.""" + + def _decode(self, obj, context, path): + # In case the string contains only 0xff bytes we interpret it as an empty string + if obj == b'\xff' * len(obj): + return "" + return codecs.decode(obj, "utf-8") + + def _encode(self, obj, context, path): + return codecs.encode(obj, "utf-8") +
class BcdAdapter(Adapter): """convert a bytes() type to a string of BCD nibbles.""" diff --git a/pySim/ts_31_102.py b/pySim/ts_31_102.py index 5db3e88..dc674b2 100644 --- a/pySim/ts_31_102.py +++ b/pySim/ts_31_102.py @@ -872,7 +872,7 @@ class ePDGId(BER_TLV_IE, tag=0x80, nested=[]): _construct = Struct('type_of_ePDG_address'/Enum(Byte, FQDN=0, IPv4=1, IPv6=2), 'ePDG_address'/Switch(this.type_of_address, - {'FQDN': GreedyString("utf8"), + {'FQDN': Utf8Adapter(GreedyBytes), 'IPv4': HexAdapter(GreedyBytes), 'IPv6': HexAdapter(GreedyBytes)}))
@@ -951,15 +951,15 @@ class EF_SUPI_NAI(TransparentEF): class NetworkSpecificIdentifier(TLV_IE, tag=0x80): # RFC 7542 encoded as UTF-8 string - _construct = GreedyString("utf8") + _construct = Utf8Adapter(GreedyBytes)
class GlobalLineIdentifier(TLV_IE, tag=0x81): # TS 23.003 clause 28.16.2 - _construct = GreedyString("utf8") + _construct = Utf8Adapter(GreedyBytes)
class GlobalCableIdentifier(TLV_IE, tag=0x82): # TS 23.003 clause 28.15.2 - _construct = GreedyString("utf8") + _construct = Utf8Adapter(GreedyBytes)
class NAI_TLV_Collection(TLV_IE_Collection, nested=[NetworkSpecificIdentifier, GlobalLineIdentifier, GlobalCableIdentifier]): @@ -984,7 +984,7 @@ # TS 31.102 Section 4.4.11.13 class EF_TN3GPPSNN(TransparentEF): class ServingNetworkName(BER_TLV_IE, tag=0x80): - _construct = GreedyString("utf8") + _construct = Utf8Adapter(GreedyBytes) def __init__(self, fid='4f0c', sfid=0x0c, name='EF.TN3GPPSNN', desc='Trusted non-3GPP Serving network names list', **kwargs): super().__init__(fid, sfid=sfid, name=name, desc=desc, **kwargs) diff --git a/pySim/ts_31_103.py b/pySim/ts_31_103.py index 5f374cb..19ade2b 100644 --- a/pySim/ts_31_103.py +++ b/pySim/ts_31_103.py @@ -30,6 +30,7 @@ from pySim.ts_31_102_telecom import EF_UServiceTable import pySim.ts_102_221 from pySim.ts_102_221 import EF_ARR +from pySim.construct import *
# Mapping between ISIM Service Number and its description EF_IST_map = { @@ -62,8 +63,9 @@ ( '803137333830303630303030303031303140696d732e6d6e633030302e6d63633733382e336770706e6574776f726b2e6f7267', { "nai": "738006000000101@ims.mnc000.mcc738.3gppnetwork.org" } ), ] + class nai(BER_TLV_IE, tag=0x80): - _construct = GreedyString("utf8") + _construct = Utf8Adapter(GreedyBytes)
def __init__(self, fid='6f02', sfid=0x02, name='EF.IMPI', desc='IMS private user identity', **kwargs): super().__init__(fid=fid, sfid=sfid, name=name, desc=desc, **kwargs) @@ -76,7 +78,7 @@ { "domain": "ims.mnc000.mcc738.3gppnetwork.org" } ), ] class domain(BER_TLV_IE, tag=0x80): - _construct = GreedyString("utf8") + _construct = Utf8Adapter(GreedyBytes)
def __init__(self, fid='6f03', sfid=0x05, name='EF.DOMAIN', desc='Home Network Domain Name', **kwargs): super().__init__(fid=fid, sfid=sfid, name=name, desc=desc, **kwargs) @@ -89,7 +91,7 @@ { "impu": "sip:738006000000101@ims.mnc000.mcc738.3gppnetwork.org" } ), ] class impu(BER_TLV_IE, tag=0x80): - _construct = GreedyString("utf8") + _construct = Utf8Adapter(GreedyBytes)
def __init__(self, fid='6f04', sfid=0x04, name='EF.IMPU', desc='IMS public user identity', **kwargs): super().__init__(fid=fid, sfid=sfid, name=name, desc=desc, **kwargs) @@ -165,7 +167,7 @@ # TS 31.103 Section 4.2.16 class EF_UICCIARI(LinFixedEF): class iari(BER_TLV_IE, tag=0x80): - _construct = GreedyString("utf8") + _construct = Utf8Adapter(GreedyBytes)
def __init__(self, fid='6fe7', sfid=None, name='EF.UICCIARI', desc='UICC IARI', **kwargs): super().__init__(fid=fid, sfid=sfid, name=name, desc=desc, **kwargs) @@ -232,7 +234,7 @@ # TS 31.103 Section 4.2.20 class EF_WebRTCURI(TransparentEF): class uri(BER_TLV_IE, tag=0x80): - _construct = GreedyString("utf8") + _construct = Utf8Adapter(GreedyBytes)
def __init__(self, fid='6ffa', sfid=None, name='EF.WebRTCURI', desc='WebRTC URI', **kwargs): super().__init__(fid=fid, sfid=sfid, name=name, desc=desc, **kwargs)