ocrodjvu (0.4.6-3+squeeze1) cuneiform-temp-files.diff

Summary

 lib/cuneiform.py |   27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

    
download this patch

Patch contents

Description: Fix insecure use of temporary files.
Origin: backport, http://bitbucket.org/jwilk/ocrodjvu/changeset/dedea346339f
Bug-Debian: http://bugs.debian.org/598134
Last-Update: 2010-09-26

--- a/lib/cuneiform.py
+++ b/lib/cuneiform.py
@@ -11,8 +11,12 @@
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 # General Public License for more details.
 
+from __future__ import with_statement
+
 import contextlib
 import re
+import os
+import shutil
 import tempfile
 from cStringIO import StringIO
 
@@ -67,17 +71,24 @@
     return language in get_languages()
 
 def recognize(pbm_file, language):
-    hocr_file = tempfile.NamedTemporaryFile(prefix='ocrodjvu.', suffix='.html')
-    worker = ipc.Subprocess(
-        ['cuneiform', '-l', iso_to_cuneiform(language), '-f', 'hocr', '-o', hocr_file.name, pbm_file.name],
-        stdout=ipc.PIPE,
-        env={},  # locale=POSIX
-    )
-    worker.wait()
+    hocr_directory = tempfile.mkdtemp(prefix='ocrodjvu.')
+    # A separate non-world-writable directory is needed, as Cuneiform
+    # can create additional files, e.g. images.
+    try:
+        hocr_file_name = os.path.join(hocr_directory, 'ocr.html')
+        worker = ipc.Subprocess(
+            ['cuneiform', '-l', iso_to_cuneiform(language), '-f', 'hocr', '-o', hocr_file_name, pbm_file.name],
+            stdout=ipc.PIPE,
+            env={},  # locale=POSIX
+        )
+        worker.wait()
+        with open(hocr_file_name, 'r') as hocr_file:
+            contents = hocr_file.read()
+    finally:
+        shutil.rmtree(hocr_directory)
     # Sometimes Cuneiform returns files with broken encoding or with control
     # characters: https://bugs.launchpad.net/cuneiform-linux/+bug/585418
     # Let's fix it.
-    contents = hocr_file.read()
     contents = utils.sanitize_utf8(contents)
     return contextlib.closing(StringIO(contents))