From 6e2ff4e81ccadb02e351480fd9fa837cff8e98b1 Mon Sep 17 00:00:00 2001
From: ARF <none@none>
Date: Thu, 7 Apr 2016 15:03:18 +0200
Subject: [PATCH 1/7] __shapeIndex optimization: removed superfluous tell()

x2.0 speedup over master
---
 shapefile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/shapefile.py b/shapefile.py
index d23f393f..e950dc73 100644
--- a/shapefile.py
+++ b/shapefile.py
@@ -392,7 +392,7 @@ def __shapeIndex(self, i=None):
             for r in range(numRecords):
                 # Offsets are 16-bit words just like the file length
                 self._offsets.append(unpack(">i", shx.read(4))[0] * 2)
-                shx.seek(shx.tell() + 4)
+                shx.seek(4, 1)
         if not i == None:
             return self._offsets[i]
 

From 6e26248003b365847acf653f0709af6a9c79bbfe Mon Sep 17 00:00:00 2001
From: ARF <none@none>
Date: Thu, 7 Apr 2016 16:13:50 +0200
Subject: [PATCH 2/7] __shapeIndex optimization: remove seek() call

x1.7 speedup over previous commit
---
 shapefile.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/shapefile.py b/shapefile.py
index e950dc73..cf66c8fb 100644
--- a/shapefile.py
+++ b/shapefile.py
@@ -391,8 +391,7 @@ def __shapeIndex(self, i=None):
             shx.seek(100)
             for r in range(numRecords):
                 # Offsets are 16-bit words just like the file length
-                self._offsets.append(unpack(">i", shx.read(4))[0] * 2)
-                shx.seek(4, 1)
+                self._offsets.append(unpack('>i4x', shx.read(8))[0] * 2)
         if not i == None:
             return self._offsets[i]
 

From 8f671db0cded123c223c85ad2baf6d998f4e25a1 Mon Sep 17 00:00:00 2001
From: ARF <none@none>
Date: Thu, 7 Apr 2016 16:21:48 +0200
Subject: [PATCH 3/7] __shapeIndex optimization: use list comprehension

x1.3 speedup over previous commit
---
 shapefile.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/shapefile.py b/shapefile.py
index cf66c8fb..721c9126 100644
--- a/shapefile.py
+++ b/shapefile.py
@@ -389,9 +389,8 @@ def __shapeIndex(self, i=None):
             numRecords = shxRecordLength // 8
             # Jump to the first record.
             shx.seek(100)
-            for r in range(numRecords):
-                # Offsets are 16-bit words just like the file length
-                self._offsets.append(unpack('>i4x', shx.read(8))[0] * 2)
+            # Offsets are 16-bit words just like the file length
+            self._offsets = [unpack('>i4x', shx.read(8))[0] * 2 for r in range(numRecords)]
         if not i == None:
             return self._offsets[i]
 

From efafff8250d944e4e7aced120f5e511e1925064c Mon Sep 17 00:00:00 2001
From: ARF <none@none>
Date: Thu, 7 Apr 2016 16:54:50 +0200
Subject: [PATCH 4/7] __shapeIndex optimization: read into array and discard
 unneeded elements

x1.7 speedup over previous commit
---
 shapefile.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/shapefile.py b/shapefile.py
index 721c9126..010b54a3 100644
--- a/shapefile.py
+++ b/shapefile.py
@@ -390,7 +390,11 @@ def __shapeIndex(self, i=None):
             # Jump to the first record.
             shx.seek(100)
             # Offsets are 16-bit words just like the file length
-            self._offsets = [unpack('>i4x', shx.read(8))[0] * 2 for r in range(numRecords)]
+            self._offsets = [2*el for el in 
+                             array.array('i', unpack(">%si" % (numRecords*2), 
+                                                     shx.read(4 * numRecords*2))
+                                         )[::2]
+                             ]
         if not i == None:
             return self._offsets[i]
 

From 4c0b9b4603851e689287f7708b151436ba89a972 Mon Sep 17 00:00:00 2001
From: ARF <none@none>
Date: Thu, 7 Apr 2016 17:41:10 +0200
Subject: [PATCH 5/7] __shapeIndex optimization: explicit format string
 excluding unneeded elements

x1.5 speedup over previous commit

Due to unexplained reasons, this seems to significantly speed up the apparently
untouched read().
---
 shapefile.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/shapefile.py b/shapefile.py
index 010b54a3..8f058fb9 100644
--- a/shapefile.py
+++ b/shapefile.py
@@ -389,12 +389,10 @@ def __shapeIndex(self, i=None):
             numRecords = shxRecordLength // 8
             # Jump to the first record.
             shx.seek(100)
+            shxRecords = array.array('i', unpack(">" + "i4x" * numRecords, 
+                                                 shx.read((4+4) * numRecords)))
             # Offsets are 16-bit words just like the file length
-            self._offsets = [2*el for el in 
-                             array.array('i', unpack(">%si" % (numRecords*2), 
-                                                     shx.read(4 * numRecords*2))
-                                         )[::2]
-                             ]
+            self._offsets = [2*el for el in shxRecords]
         if not i == None:
             return self._offsets[i]
 

From eec5efe061166da688262c4742cda3d43dbc5c51 Mon Sep 17 00:00:00 2001
From: ARF <none@none>
Date: Fri, 8 Apr 2016 07:26:31 +0200
Subject: [PATCH 6/7] __shapeIndex optimization: use memoryview slicing to skip
 unused field

x2.0 speedup over previous commit
---
 shapefile.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/shapefile.py b/shapefile.py
index 8f058fb9..cfba5223 100644
--- a/shapefile.py
+++ b/shapefile.py
@@ -18,6 +18,11 @@
 import tempfile
 import itertools
 
+try:
+    memoryview(b'')
+except NameError:
+    memoryview = lambda x: x
+
 #
 # Constants for shape types
 NULL = 0
@@ -389,10 +394,12 @@ def __shapeIndex(self, i=None):
             numRecords = shxRecordLength // 8
             # Jump to the first record.
             shx.seek(100)
-            shxRecords = array.array('i', unpack(">" + "i4x" * numRecords, 
-                                                 shx.read((4+4) * numRecords)))
+            shxRecords = array.array('i')
+            shxRecords.fromfile(shx, 2 * numRecords)
+            if sys.byteorder != 'big':
+                shxRecords.byteswap()
             # Offsets are 16-bit words just like the file length
-            self._offsets = [2*el for el in shxRecords]
+            self._offsets = [2 * el for el in memoryview(shxRecords)[::2]]
         if not i == None:
             return self._offsets[i]
 

From fb25a045b44e70aea9d77572e543c497e990b396 Mon Sep 17 00:00:00 2001
From: ARF <none@none>
Date: Sat, 9 Apr 2016 14:32:31 +0200
Subject: [PATCH 7/7] __shapeIndex optimization: use numpy when available for
 array arithmetic

x378 speedup over master with numpy available
x22 speedup over master without numpy
---
 shapefile.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/shapefile.py b/shapefile.py
index cfba5223..bd3be309 100644
--- a/shapefile.py
+++ b/shapefile.py
@@ -18,6 +18,12 @@
 import tempfile
 import itertools
 
+try:
+    import numpy
+    has_numpy = True
+except ImportError:
+    has_numpy = False
+
 try:
     memoryview(b'')
 except NameError:
@@ -227,7 +233,7 @@ def __init__(self, *args, **kwargs):
         self.shx = None
         self.dbf = None
         self.shapeName = "Not specified"
-        self._offsets = []
+        self._offsets = None
         self.shpLength = None
         self.numRecords = None
         self.fields = []
@@ -387,19 +393,22 @@ def __shapeIndex(self, i=None):
         shx = self.shx
         if not shx:
             return None
-        if not self._offsets:
+        if self._offsets is None:
             # File length (16-bit word * 2 = bytes) - header length
             shx.seek(24)
             shxRecordLength = (unpack(">i", shx.read(4))[0] * 2) - 100
             numRecords = shxRecordLength // 8
             # Jump to the first record.
             shx.seek(100)
-            shxRecords = array.array('i')
-            shxRecords.fromfile(shx, 2 * numRecords)
-            if sys.byteorder != 'big':
-                shxRecords.byteswap()
             # Offsets are 16-bit words just like the file length
-            self._offsets = [2 * el for el in memoryview(shxRecords)[::2]]
+            if has_numpy:
+                self._offsets = numpy.fromfile(shx, '>i4', 2 * numRecords)[::2] * 2
+            else:
+                shxRecords = array.array('i')
+                shxRecords.fromfile(shx, 2 * numRecords)
+                if sys.byteorder != 'big':
+                    shxRecords.byteswap()
+                self._offsets = [2 * el for el in memoryview(shxRecords)[::2]]
         if not i == None:
             return self._offsets[i]