0%

Python根据excel表格编号匹配csv的url

  • export_urls.csv是阿里云oss导出的url表,其中有四个列表字段,依次是:id,barcode,type,serial,img_url

  • 2.xlsx 是所依据的excel表格,固定的第一列是barcode。

  • data.xlsx是生成的结果数据。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# coding: utf-8
import re
import pandas as pd
from openpyxl import Workbook

rz = re.compile(r'/(\d+-\d)-.+jpg')

d = {}

dkey = {}


def open_csv():
for file in file_paths:
ex = pd.read_csv(file)
exs = ex.values.tolist()
for i in exs:
url = i[4]
url_id = rz.findall(url)
print(url_id)
if len(url_id)== 0 :
continue
url_id = url_id[0]
if url_id not in d:
d.update({url_id: [url]})
else:
d[url_id].append(url)

for k, v in d.items():
ks = str(k).split('-')
k1 = ks[0]
k2 = ks[1]
if k1 not in dkey:
if str(k2) == '1':
dkey.update({k1: {'0': [], '1': v}})
else:
dkey.update({k1: {'0': v, '1': []}})
else:
if str(k2) == '1':
dkey[k1]['1'].extend(v)
else:
dkey[k1]['0'].extend(v)

for k, v in dkey.items():
print(type(k), v)
wb.append([str(k), str(v['0']), str(v['1'])])

for i in orl_key:
if str(i) not in dkey:
print(type(i), 'iii')
wb.append([str(i)])


if __name__ == "__main__":
orl_urlids = pd.read_excel(r'2.xlsx')
orl_urls = orl_urlids.values.tolist()
orl_key = [i[0] for i in orl_urls]

wk = Workbook()
wb = wk.active

file_paths = ['./export_urls.csv']
open_csv()
wk.save(r'data.xlsx')
# """