laforge has uploaded this change for review.

View Change

pySim.esim.saip: Implement optimized file content encoding

Make sure we make use of the fill pattern when encoding file contents:
Only encode the differences to the fill pattern of the file, in order
to reduce the profile download size.

Change-Id: I61e4a5e04beba5c9092979fc546292d5ef3d7aad
---
M pySim/esim/saip/__init__.py
M tests/unittests/test_esim_saip.py
2 files changed, 81 insertions(+), 6 deletions(-)

git pull ssh://gerrit.osmocom.org:29418/pysim refs/changes/14/38014/1
diff --git a/pySim/esim/saip/__init__.py b/pySim/esim/saip/__init__.py
index 658810a..505ea30 100644
--- a/pySim/esim/saip/__init__.py
+++ b/pySim/esim/saip/__init__.py
@@ -20,6 +20,8 @@
import io
from typing import Tuple, List, Optional, Dict, Union
from collections import OrderedDict
+from difflib import SequenceMatcher, Match
+
import asn1tools
from osmocom.utils import b2h, h2b, Hexstr
from osmocom.tlv import BER_TLV_IE, bertlv_parse_tag, bertlv_parse_len
@@ -40,6 +42,29 @@

logger = logging.getLogger(__name__)

+class NonMatch(Match):
+ """Representing a contiguous non-matching block of data; the opposite of difflib.Match"""
+ @classmethod
+ def from_matchlist(cls, l: List[Match], size:int) -> List['NonMatch']:
+ """Build a list of non-matching blocks of data from its inverse (list of matching blocks).
+ The caller must ensure that the input list is ordered, non-overlapping and only contains
+ matches at equal offsets in a and b."""
+ res = []
+ cur = 0
+ for match in l:
+ if match.a != match.b:
+ return ValueError('only works for equal-offset matches')
+ assert match.a >= cur
+ nm_len = match.a - cur
+ if nm_len > 0:
+ # there's no point in generating zero-lenth non-matching sections
+ res.append(cls(a=cur, b=cur, size=nm_len))
+ cur = match.a + match.size
+ if size > cur:
+ res.append(cls(a=cur, b=cur, size=size-cur))
+
+ return res
+
class Naa:
"""A class defining a Network Access Application (NAA)."""
name = None
@@ -359,12 +384,33 @@
return ValueError("Unknown key '%s' in tuple list" % k)
return stream.getvalue()

- def file_content_to_tuples(self) -> List[Tuple]:
- # FIXME: simplistic approach. needs optimization. We should first check if the content
- # matches the expanded default value from the template. If it does, return empty list.
- # Next, we should compute the diff between the default value and self.body, and encode
- # that as a sequence of fillFileOffset and fillFileContent tuples.
- return [('fillFileContent', self.body)]
+ def file_content_to_tuples(self, optimize:bool = True) -> List[Tuple]:
+ if not optimize:
+ # simplistic approach: encode the full file, ignoring the template/default
+ return [('fillFileContent', self.body)]
+ # Try to 'compress' the file body, based on the default file contents.
+ if self.template:
+ default = self.template.expand_default_value_pattern(length=len(self.body))
+ if not default:
+ sm = SequenceMatcher(a=b'\xff'*len(self.body), b=self.body)
+ else:
+ if default == self.body:
+ # 100% match: retrun an empty tuple list to make eUICC use the default
+ return []
+ sm = SequenceMatcher(a=default, b=self.body)
+ else:
+ # no template at all: we can only remove padding
+ sm = SequenceMatcher(a=b'\xff'*len(self.body), b=self.body)
+ matching_blocks = sm.get_matching_blocks()
+ # we can only make use of matches that have the same offset in 'a' and 'b'
+ matching_blocks = [x for x in matching_blocks if x.size > 0 and x.a == x.b]
+ non_matching_blocks = NonMatch.from_matchlist(matching_blocks, self.file_size)
+ ret = []
+ cur = 0
+ for block in non_matching_blocks:
+ ret.append(('fillFileOffset', block.a - cur))
+ ret.append(('fillFileContent', self.body[block.a:block.a+block.size]))
+ return ret

def __str__(self) -> str:
return "File(%s)" % self.pe_name
diff --git a/tests/unittests/test_esim_saip.py b/tests/unittests/test_esim_saip.py
index e7e324d..edf6d8d 100755
--- a/tests/unittests/test_esim_saip.py
+++ b/tests/unittests/test_esim_saip.py
@@ -90,5 +90,34 @@
self.assertTrue(oid.OID('1.0.1') > oid.OID('1.0'))
self.assertTrue(oid.OID('1.0.2') > oid.OID('1.0.1'))

+class NonMatchTest(unittest.TestCase):
+ def test_nonmatch(self):
+ # non-matches before, in between and after matches
+ match_list = [Match(a=10, b=10, size=5), Match(a=20, b=20, size=4)]
+ nm_list = NonMatch.from_matchlist(match_list, 26)
+ self.assertEqual(nm_list, [NonMatch(a=0, b=0, size=10), NonMatch(a=15, b=15, size=5),
+ NonMatch(a=24, b=24, size=2)])
+
+ def test_nonmatch_beg(self):
+ # single match at beginning
+ match_list = [Match(a=0, b=0, size=5)]
+ nm_list = NonMatch.from_matchlist(match_list, 20)
+ self.assertEqual(nm_list, [NonMatch(a=5, b=5, size=15)])
+
+ def test_nonmatch_end(self):
+ # single match at end
+ match_list = [Match(a=19, b=19, size=5)]
+ nm_list = NonMatch.from_matchlist(match_list, 24)
+ self.assertEqual(nm_list, [NonMatch(a=0, b=0, size=19)])
+
+ def test_nonmatch_none(self):
+ # no match at all
+ match_list = []
+ nm_list = NonMatch.from_matchlist(match_list, 24)
+ self.assertEqual(nm_list, [NonMatch(a=0, b=0, size=24)])
+
+
+
+
if __name__ == "__main__":
unittest.main()

To view, visit change 38014. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-MessageType: newchange
Gerrit-Project: pysim
Gerrit-Branch: master
Gerrit-Change-Id: I61e4a5e04beba5c9092979fc546292d5ef3d7aad
Gerrit-Change-Number: 38014
Gerrit-PatchSet: 1
Gerrit-Owner: laforge <laforge@osmocom.org>