上传后在Django中预处理图像

我需要在django中上传后处理图像。 目前所有上传的文件都保存在文档文件夹中。

Models.py

从django.db导入模型导入操作系统

class Document(models.Model):docfile = models.FileField(upload_to ='documents /%Y /%m /%d')

处理图像的文件位于remove_noise.py中

remove_noise.py

import tempfile
import cv2
import logging
import numpy as np
from PIL import Image


IMAGE_SIZE = 1800
BINARY_THREHOLD = 280

size = None

def get_size_of_scaled_image(im):
    global size
    if size is None:
        length_x, width_y = im.size
        factor = max(1, int(IMAGE_SIZE / length_x))
        size = factor * length_x, factor * width_y
    return size

def process_image_for_ocr(file_path):
    logging.info('Processing image for text Extraction')
    temp_filename = set_image_dpi(file_path)
    im_new = remove_noise_and_smooth(temp_filename)
    return im_new

def set_image_dpi(file_path):
    im = Image.open(file_path)
    size = get_size_of_scaled_image(im)
    im_resized = im.resize(size, Image.ANTIALIAS)
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
    temp_filename = temp_file.name
    im_resized.save(temp_filename, dpi=(300, 300))
    return temp_filename 

def image_smoothening(img):
    ret1, th1 = cv2.threshold(img, BINARY_THREHOLD, 255, cv2.THRESH_BINARY)
    ret2, th2 = cv2.threshold(th1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    blur = cv2.GaussianBlur(th2, (1,1), 0)
    ret3, th3 = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return th3


def remove_noise_and_smooth(file_name):
    logging.info('Removing noise and smoothening image')
    img = cv2.imread(file_name, 0)
    filtered = cv2.adaptiveThreshold(img.astype(np.uint8), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 41, 3)
    kernel = np.ones((1, 1), np.uint8)
    opening = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel)
    closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel)
    img = image_smoothening(img)
    or_image = cv2.bitwise_or(img, closing)
    return or_image

Views.py

from django.shortcuts import render

# -*- coding: utf-8 -*-
from django.shortcuts import render_to_response
from django.template import RequestContext
from django.http import HttpResponseRedirect
from django.urls import reverse

from .models import Document
from .forms import DocumentForm
from .remove_noise import process_image_for_ocr, get_size_of_scaled_image

from django.http import HttpResponse
import csv
import ipdb
import tempfile
from Cython.Compiler.Buffer import context

try:
        import Image
except ImportError:
        from PIL import Image
import pytesseract

global i
i = 0

def list(request):
    global i
    # Handle file upload
    if request.method == 'POST':
        form = DocumentForm(request.POST, request.FILES)
        if form.is_valid():
            newdoc = Document(docfile=request.FILES['docfile'])
            newdoc.save()

            i += 1 
            # import ipdb;ipdb.set_trace()
            d = Document.objects.get(id=i)

            #print d.docfile
            k=pytesseract.image_to_string(Image.open(d.docfile))
            #print k
            handle = open('data.txt', 'a+')
            handle.write(k)
            handle.close()

            txt_file = r"data.txt"
            csv_file = r'mycsv.csv'

            in_txt = csv.reader(open(txt_file, "r"), delimiter = ' ')
            out_csv = csv.writer(open(csv_file, 'w', encoding='utf-8'))

            out_csv.writerows(in_txt)

            # Redirect to the document list after POST
            return HttpResponseRedirect(reverse('ocr:list'))
    else:
        form = DocumentForm()  # A empty, unbound form

    # Load documents for the list page
    documents = Document.objects.all()

    # Render list page with the documents and the form
    return render(request, 
        'list.html',
        {'documents': documents, 'form': form},
        context
    )
链接地址: http://www.djcxy.com/p/56539.html

上一篇: preprocess an Image in Django after upload

下一篇: How to set class name in table cell in python django?