Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

#! /usr/bin/env python3 

# -*- coding: utf-8 -*- 

 

#       Copyright 2013, Marten de Vries 

# 

#       This file is part of OpenTeacher. 

# 

#       OpenTeacher is free software: you can redistribute it and/or modify 

#       it under the terms of the GNU General Public License as published by 

#       the Free Software Foundation, either version 3 of the License, or 

#       (at your option) any later version. 

# 

#       OpenTeacher is distributed in the hope that it will be useful, 

#       but WITHOUT ANY WARRANTY; without even the implied warranty of 

#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 

#       GNU General Public License for more details. 

# 

#       You should have received a copy of the GNU General Public License 

#       along with OpenTeacher.  If not, see <http://www.gnu.org/licenses/>. 

 

import os 

import tempfile 

import subprocess 

import distutils.spawn 

 

class CuneiformOCRModule: 

        """Recognizes text in an image with the Cuneiform OCR program. 

           Outputs to HOCR. 

 

        """ 

        def __init__(self, moduleManager, *args, **kwargs): 

                super().__init__(*args, **kwargs) 

                self._mm = moduleManager 

 

                self.type = "ocrRecognizer" 

                self.priorities = { 

                        "default": 666, 

                } 

 

        def _callCuneiform(self, *args): 

                with open(os.devnull, "wb") as f: 

                        return subprocess.call(["cuneiform"] + list(args), stdout=f, stderr=subprocess.STDOUT) 

 

        def toHocr(self, imagePath): 

                fd, hocrPath = tempfile.mkstemp(".html") 

                os.close(fd) 

                self._callCuneiform("-f", "hocr", "-o", hocrPath, imagePath) 

                with open(hocrPath, 'r', encoding='UTF-8') as f: 

                        hocr = f.read() 

 

                os.remove(hocrPath) 

                return hocr 

 

        def enable(self): 

                if not distutils.spawn.find_executable("cuneiform"):# pragma: no cover 

                        #remain inactive 

                        return 

                self.active = True 

 

        def disable(self): 

                self.active = False 

 

def init(moduleManager): 

        return CuneiformOCRModule(moduleManager)