教你怎么用python删除相似度高的图片
短信预约 信息系统项目管理师 报名、考试、查分时间动态提醒
1. 前言
因为输入是视频,切完帧之后都是连续图片,所以我的目录结构如下:
其中frame_output是视频切帧后的保存路径,1和2文件夹分别对应两个是视频切帧后的图片。
2. 切帧代码如下:
#encoding:utf-8
import os
import sys
import cv2
video_path = '/home/pythonfile/video/' # 绝对路径,video下有两段视频
out_frame_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'frame_output') #frame_output是视频切帧后的保存路径
if not os.path.exists(out_frame_path):
os.makedirs(out_frame_path)
print('out_frame_path', out_frame_path)
files = []
list1 = os.listdir(video_path)
print('list', list1)
for i in range(len(list1)):
item = os.path.join(video_path, list1[i])
files.append(item)
print('files',files)
for k,file in enumerate(files):
frame_dir = os.path.join(out_frame_path, '%d'%(k+1))
if not os.path.exists(frame_dir):
os.makedirs(frame_dir)
cap = cv2.VideoCapture(file)
j = 0
print('start prossing NO.%d video' % (k + 1))
while True:
ret, frame = cap.read()
j += 1
if ret:
#每三帧保存一张
if j % 3 == 0:
cv2.imwrite(os.path.join(frame_dir, '%d.jpg'%j), frame)
else:
cap.release()
break
print('prossed NO.%d video'%(k+1))
3. 删除相似度高的图片
# coding: utf-8
import os
import cv2
# from skimage.measure import compare_ssim
# from skimage.metrics import _structural_similarity
from skimage.metrics import structural_similarity as ssim
def delete(filename1):
os.remove(filename1)
def list_all_files(root):
files = []
list = os.listdir(root)
# os.listdir()方法:返回指定文件夹包含的文件或子文件夹名字的列表。该列表顺序以字母排序
for i in range(len(list)):
element = os.path.join(root, list[i])
# 需要先使用python路径拼接os.path.join()函数,将os.listdir()返回的名称拼接成文件或目录的绝对路径再传入os.path.isdir()和os.path.isfile().
if os.path.isdir(element): # os.path.isdir()用于判断某一对象(需提供绝对路径)是否为目录
# temp_dir = os.path.split(element)[-1]
# os.path.split分割文件名与路径,分割为data_dir和此路径下的文件名,[-1]表示只取data_dir下的文件名
files.append(list_all_files(element))
elif os.path.isfile(element):
files.append(element)
# print('2',files)
return files
def ssim_compare(img_files):
count = 0
for currIndex, filename in enumerate(img_files):
if not os.path.exists(img_files[currIndex]):
print('not exist', img_files[currIndex])
break
img = cv2.imread(img_files[currIndex])
img1 = cv2.imread(img_files[currIndex + 1])
#进行结构性相似度判断
# ssim_value = _structural_similarity.structural_similarity(img,img1,multichannel=True)
ssim_value = ssim(img,img1,multichannel=True)
if ssim_value > 0.9:
#基数
count += 1
imgs_n.append(img_files[currIndex + 1])
print('big_ssim:',img_files[currIndex], img_files[currIndex + 1], ssim_value)
# 避免数组越界
if currIndex+1 >= len(img_files)-1:
break
return count
if __name__ == '__main__':
path = '/home/dj/pythonfile/frame_output/'
img_path = path
imgs_n = []
all_files = list_all_files(path) #返回包含完整路径的所有图片名的列表
print('1',len(all_files))
for files in all_files:
# 根据文件名排序,x.rfind('/')是从右边寻找第一个‘/'出现的位置,也就是最后出现的位置
# 注意sort和sorted的区别,sort作用于原列表,sorted生成新的列表,且sorted可以作用于所有可迭代对象
files.sort(key = lambda x: int(x[x.rfind('/')+1:-4]))#路径中包含“/”
# print(files)
img_files = []
for img in files:
if img.endswith('.jpg'):
# 将所有图片名都放入列表中
img_files.append(img)
count = ssim_compare(img_files)
print(img[:img.rfind('/')],"路径下删除的图片数量为:",count)
for image in imgs_n:
delete(image)
4. 导入skimage.measure import compare_ssim出错的解决方法:
将
from skimage.measure import compare_ssim
改为
from skimage.metrics import _structural_similarity
5. structural_similarity.py的源码
from warnings import warn
import numpy as np
from scipy.ndimage import uniform_filter, gaussian_filter
from ..util.dtype import dtype_range
from ..util.arraycrop import crop
from .._shared.utils import warn, check_shape_equality
__all__ = ['structural_similarity']
def structural_similarity(im1, im2,
*,
win_size=None, gradient=False, data_range=None,
multichannel=False, gaussian_weights=False,
full=False, **kwargs):
"""
Compute the mean structural similarity index between two images.
Parameters
----------
im1, im2 : ndarray
Images. Any dimensionality with same shape.
win_size : int or None, optional
The side-length of the sliding window used in comparison. Must be an
odd value. If `gaussian_weights` is True, this is ignored and the
window size will depend on `sigma`.
gradient : bool, optional
If True, also return the gradient with respect to im2.
data_range : float, optional
The data range of the input image (distance between minimum and
maximum possible values). By default, this is estimated from the image
data-type.
multichannel : bool, optional
If True, treat the last dimension of the array as channels. Similarity
calculations are done independently for each channel then averaged.
gaussian_weights : bool, optional
If True, each patch has its mean and variance spatially weighted by a
normalized Gaussian kernel of width sigma=1.5.
full : bool, optional
If True, also return the full structural similarity image.
Other Parameters
----------------
use_sample_covariance : bool
If True, normalize covariances by N-1 rather than, N where N is the
number of pixels within the sliding window.
K1 : float
Algorithm parameter, K1 (small constant, see [1]_).
K2 : float
Algorithm parameter, K2 (small constant, see [1]_).
sigma : float
Standard deviation for the Gaussian when `gaussian_weights` is True.
Returns
-------
mssim : float
The mean structural similarity index over the image.
grad : ndarray
The gradient of the structural similarity between im1 and im2 [2]_.
This is only returned if `gradient` is set to True.
S : ndarray
The full SSIM image. This is only returned if `full` is set to True.
Notes
-----
To match the implementation of Wang et. al. [1]_, set `gaussian_weights`
to True, `sigma` to 1.5, and `use_sample_covariance` to False.
.. versionchanged:: 0.16
This function was renamed from ``skimage.measure.compare_ssim`` to
``skimage.metrics.structural_similarity``.
References
----------
.. [1] Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P.
(2004). Image quality assessment: From error visibility to
structural similarity. IEEE Transactions on Image Processing,
13, 600-612.
https://ece.uwaterloo.ca/~z70wang/publications/ssim.pdf,
:DOI:`10.1109/TIP.2003.819861`
.. [2] Avanaki, A. N. (2009). Exact global histogram specification
optimized for structural similarity. Optical Review, 16, 613-621.
:arxiv:`0901.0065`
:DOI:`10.1007/s10043-009-0119-z`
"""
check_shape_equality(im1, im2)
if multichannel:
# loop over channels
args = dict(win_size=win_size,
gradient=gradient,
data_range=data_range,
multichannel=False,
gaussian_weights=gaussian_weights,
full=full)
args.update(kwargs)
nch = im1.shape[-1]
mssim = np.empty(nch)
if gradient:
G = np.empty(im1.shape)
if full:
S = np.empty(im1.shape)
for ch in range(nch):
ch_result = structural_similarity(im1[..., ch],
im2[..., ch], **args)
if gradient and full:
mssim[..., ch], G[..., ch], S[..., ch] = ch_result
elif gradient:
mssim[..., ch], G[..., ch] = ch_result
elif full:
mssim[..., ch], S[..., ch] = ch_result
else:
mssim[..., ch] = ch_result
mssim = mssim.mean()
if gradient and full:
return mssim, G, S
elif gradient:
return mssim, G
elif full:
return mssim, S
else:
return mssim
K1 = kwargs.pop('K1', 0.01)
K2 = kwargs.pop('K2', 0.03)
sigma = kwargs.pop('sigma', 1.5)
if K1 < 0:
raise ValueError("K1 must be positive")
if K2 < 0:
raise ValueError("K2 must be positive")
if sigma < 0:
raise ValueError("sigma must be positive")
use_sample_covariance = kwargs.pop('use_sample_covariance', True)
if gaussian_weights:
# Set to give an 11-tap filter with the default sigma of 1.5 to match
# Wang et. al. 2004.
truncate = 3.5
if win_size is None:
if gaussian_weights:
# set win_size used by crop to match the filter size
r = int(truncate * sigma + 0.5) # radius as in ndimage
win_size = 2 * r + 1
else:
win_size = 7 # backwards compatibility
if np.any((np.asarray(im1.shape) - win_size) < 0):
raise ValueError(
"win_size exceeds image extent. If the input is a multichannel "
"(color) image, set multichannel=True.")
if not (win_size % 2 == 1):
raise ValueError('Window size must be odd.')
if data_range is None:
if im1.dtype != im2.dtype:
warn("Inputs have mismatched dtype. Setting data_range based on "
"im1.dtype.", stacklevel=2)
dmin, dmax = dtype_range[im1.dtype.type]
data_range = dmax - dmin
ndim = im1.ndim
if gaussian_weights:
filter_func = gaussian_filter
filter_args = {'sigma': sigma, 'truncate': truncate}
else:
filter_func = uniform_filter
filter_args = {'size': win_size}
# ndimage filters need floating point data
im1 = im1.astype(np.float64)
im2 = im2.astype(np.float64)
NP = win_size ** ndim
# filter has already normalized by NP
if use_sample_covariance:
cov_norm = NP / (NP - 1) # sample covariance
else:
cov_norm = 1.0 # population covariance to match Wang et. al. 2004
# compute (weighted) means
ux = filter_func(im1, **filter_args)
uy = filter_func(im2, **filter_args)
# compute (weighted) variances and covariances
uxx = filter_func(im1 * im1, **filter_args)
uyy = filter_func(im2 * im2, **filter_args)
uxy = filter_func(im1 * im2, **filter_args)
vx = cov_norm * (uxx - ux * ux)
vy = cov_norm * (uyy - uy * uy)
vxy = cov_norm * (uxy - ux * uy)
R = data_range
C1 = (K1 * R) ** 2
C2 = (K2 * R) ** 2
A1, A2, B1, B2 = ((2 * ux * uy + C1,
2 * vxy + C2,
ux ** 2 + uy ** 2 + C1,
vx + vy + C2))
D = B1 * B2
S = (A1 * A2) / D
# to avoid edge effects will ignore filter radius strip around edges
pad = (win_size - 1) // 2
# compute (weighted) mean of ssim
mssim = crop(S, pad).mean()
if gradient:
# The following is Eqs. 7-8 of Avanaki 2009.
grad = filter_func(A1 / D, **filter_args) * im1
grad += filter_func(-S / B2, **filter_args) * im2
grad += filter_func((ux * (A2 - A1) - uy * (B2 - B1) * S) / D,
**filter_args)
grad *= (2 / im1.size)
if full:
return mssim, grad, S
else:
return mssim, grad
else:
if full:
return mssim, S
else:
return mssim
到此这篇关于教你怎么用python删除相似度高的图片的文章就介绍到这了,更多相关python删除相似度高的图片内容请搜索编程网以前的文章或继续浏览下面的相关文章希望大家以后多多支持编程网!
免责声明:
① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的,并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据,供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。
② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341