上传后在Django中预处理图像
我需要在django中上传后处理图像。 目前所有上传的文件都保存在文档文件夹中。
Models.py
从django.db导入模型导入操作系统
class Document(models.Model):docfile = models.FileField(upload_to ='documents /%Y /%m /%d')
处理图像的文件位于remove_noise.py中
remove_noise.py
import tempfile
import cv2
import logging
import numpy as np
from PIL import Image
IMAGE_SIZE = 1800
BINARY_THREHOLD = 280
size = None
def get_size_of_scaled_image(im):
global size
if size is None:
length_x, width_y = im.size
factor = max(1, int(IMAGE_SIZE / length_x))
size = factor * length_x, factor * width_y
return size
def process_image_for_ocr(file_path):
logging.info('Processing image for text Extraction')
temp_filename = set_image_dpi(file_path)
im_new = remove_noise_and_smooth(temp_filename)
return im_new
def set_image_dpi(file_path):
im = Image.open(file_path)
size = get_size_of_scaled_image(im)
im_resized = im.resize(size, Image.ANTIALIAS)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
temp_filename = temp_file.name
im_resized.save(temp_filename, dpi=(300, 300))
return temp_filename
def image_smoothening(img):
ret1, th1 = cv2.threshold(img, BINARY_THREHOLD, 255, cv2.THRESH_BINARY)
ret2, th2 = cv2.threshold(th1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
blur = cv2.GaussianBlur(th2, (1,1), 0)
ret3, th3 = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return th3
def remove_noise_and_smooth(file_name):
logging.info('Removing noise and smoothening image')
img = cv2.imread(file_name, 0)
filtered = cv2.adaptiveThreshold(img.astype(np.uint8), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 41, 3)
kernel = np.ones((1, 1), np.uint8)
opening = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel)
closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel)
img = image_smoothening(img)
or_image = cv2.bitwise_or(img, closing)
return or_image
Views.py
from django.shortcuts import render
# -*- coding: utf-8 -*-
from django.shortcuts import render_to_response
from django.template import RequestContext
from django.http import HttpResponseRedirect
from django.urls import reverse
from .models import Document
from .forms import DocumentForm
from .remove_noise import process_image_for_ocr, get_size_of_scaled_image
from django.http import HttpResponse
import csv
import ipdb
import tempfile
from Cython.Compiler.Buffer import context
try:
import Image
except ImportError:
from PIL import Image
import pytesseract
global i
i = 0
def list(request):
global i
# Handle file upload
if request.method == 'POST':
form = DocumentForm(request.POST, request.FILES)
if form.is_valid():
newdoc = Document(docfile=request.FILES['docfile'])
newdoc.save()
i += 1
# import ipdb;ipdb.set_trace()
d = Document.objects.get(id=i)
#print d.docfile
k=pytesseract.image_to_string(Image.open(d.docfile))
#print k
handle = open('data.txt', 'a+')
handle.write(k)
handle.close()
txt_file = r"data.txt"
csv_file = r'mycsv.csv'
in_txt = csv.reader(open(txt_file, "r"), delimiter = ' ')
out_csv = csv.writer(open(csv_file, 'w', encoding='utf-8'))
out_csv.writerows(in_txt)
# Redirect to the document list after POST
return HttpResponseRedirect(reverse('ocr:list'))
else:
form = DocumentForm() # A empty, unbound form
# Load documents for the list page
documents = Document.objects.all()
# Render list page with the documents and the form
return render(request,
'list.html',
{'documents': documents, 'form': form},
context
)
链接地址: http://www.djcxy.com/p/56539.html