laforge has uploaded this change for review.

View Change

pySim.esim.saip: Implement optimized file content encoding

Make sure we make use of the fill pattern when encoding file contents:
Only encode the differences to the fill pattern of the file, in order
to reduce the profile download size.

Change-Id: I61e4a5e04beba5c9092979fc546292d5ef3d7aad
---
M pySim/esim/saip/__init__.py
M tests/unittests/test_esim_saip.py
2 files changed, 81 insertions(+), 6 deletions(-)

git pull ssh://gerrit.osmocom.org:29418/pysim refs/changes/14/38014/1

diff --git a/pySim/esim/saip/__init__.py b/pySim/esim/saip/__init__.py
index 658810a..505ea30 100644
--- a/pySim/esim/saip/__init__.py
+++ b/pySim/esim/saip/__init__.py
@@ -20,6 +20,8 @@
 import io
 from typing import Tuple, List, Optional, Dict, Union
 from collections import OrderedDict
+from difflib import SequenceMatcher, Match
+
 import asn1tools
 from osmocom.utils import b2h, h2b, Hexstr
 from osmocom.tlv import BER_TLV_IE, bertlv_parse_tag, bertlv_parse_len
@@ -40,6 +42,29 @@
 
 logger = logging.getLogger(__name__)
 
+class NonMatch(Match):
+    """Representing a contiguous non-matching block of data; the opposite of difflib.Match"""
+    @classmethod
+    def from_matchlist(cls, l: List[Match], size:int) -> List['NonMatch']:
+        """Build a list of non-matching blocks of data from its inverse (list of matching blocks).
+        The caller must ensure that the input list is ordered, non-overlapping and only contains
+        matches at equal offsets in a and b."""
+        res = []
+        cur = 0
+        for match in l:
+            if match.a != match.b:
+                return ValueError('only works for equal-offset matches')
+            assert match.a >= cur
+            nm_len = match.a - cur
+            if nm_len > 0:
+                # there's no point in generating zero-lenth non-matching sections
+                res.append(cls(a=cur, b=cur, size=nm_len))
+            cur = match.a + match.size
+        if size > cur:
+            res.append(cls(a=cur, b=cur, size=size-cur))
+
+        return res
+
 class Naa:
     """A class defining a Network Access Application (NAA)."""
     name = None
@@ -359,12 +384,33 @@
                 return ValueError("Unknown key '%s' in tuple list" % k)
         return stream.getvalue()
 
-    def file_content_to_tuples(self) -> List[Tuple]:
-        # FIXME: simplistic approach. needs optimization. We should first check if the content
-        # matches the expanded default value from the template. If it does, return empty list.
-        # Next, we should compute the diff between the default value and self.body, and encode
-        # that as a sequence of fillFileOffset and fillFileContent tuples.
-        return [('fillFileContent', self.body)]
+    def file_content_to_tuples(self, optimize:bool = True) -> List[Tuple]:
+        if not optimize:
+            # simplistic approach: encode the full file, ignoring the template/default
+            return [('fillFileContent', self.body)]
+        # Try to 'compress' the file body, based on the default file contents.
+        if self.template:
+            default = self.template.expand_default_value_pattern(length=len(self.body))
+            if not default:
+                sm = SequenceMatcher(a=b'\xff'*len(self.body), b=self.body)
+            else:
+                if default == self.body:
+                    # 100% match: retrun an empty tuple list to make eUICC use the default
+                    return []
+                sm = SequenceMatcher(a=default, b=self.body)
+        else:
+            # no template at all: we can only remove padding
+            sm = SequenceMatcher(a=b'\xff'*len(self.body), b=self.body)
+        matching_blocks = sm.get_matching_blocks()
+        # we can only make use of matches that have the same offset in 'a' and 'b'
+        matching_blocks = [x for x in matching_blocks if x.size > 0 and x.a == x.b]
+        non_matching_blocks = NonMatch.from_matchlist(matching_blocks, self.file_size)
+        ret = []
+        cur = 0
+        for block in non_matching_blocks:
+            ret.append(('fillFileOffset', block.a - cur))
+            ret.append(('fillFileContent', self.body[block.a:block.a+block.size]))
+        return ret
 
     def __str__(self) -> str:
         return "File(%s)" % self.pe_name
diff --git a/tests/unittests/test_esim_saip.py b/tests/unittests/test_esim_saip.py
index e7e324d..edf6d8d 100755
--- a/tests/unittests/test_esim_saip.py
+++ b/tests/unittests/test_esim_saip.py
@@ -90,5 +90,34 @@
         self.assertTrue(oid.OID('1.0.1') > oid.OID('1.0'))
         self.assertTrue(oid.OID('1.0.2') > oid.OID('1.0.1'))
 
+class NonMatchTest(unittest.TestCase):
+    def test_nonmatch(self):
+        # non-matches before, in between and after matches
+        match_list = [Match(a=10, b=10, size=5), Match(a=20, b=20, size=4)]
+        nm_list = NonMatch.from_matchlist(match_list, 26)
+        self.assertEqual(nm_list, [NonMatch(a=0, b=0, size=10), NonMatch(a=15, b=15, size=5),
+                                   NonMatch(a=24, b=24, size=2)])
+
+    def test_nonmatch_beg(self):
+        # single match at beginning
+        match_list = [Match(a=0, b=0, size=5)]
+        nm_list = NonMatch.from_matchlist(match_list, 20)
+        self.assertEqual(nm_list, [NonMatch(a=5, b=5, size=15)])
+
+    def test_nonmatch_end(self):
+        # single match at end
+        match_list = [Match(a=19, b=19, size=5)]
+        nm_list = NonMatch.from_matchlist(match_list, 24)
+        self.assertEqual(nm_list, [NonMatch(a=0, b=0, size=19)])
+
+    def test_nonmatch_none(self):
+        # no match at all
+        match_list = []
+        nm_list = NonMatch.from_matchlist(match_list, 24)
+        self.assertEqual(nm_list, [NonMatch(a=0, b=0, size=24)])
+
+
+
+
 if __name__ == "__main__":
 	unittest.main()

To view, visit change 38014. To unsubscribe, or for help writing mail filters, visit settings.