(我已更新此帖子以反映更准确的问题图片,其中包括提供更多信息,我最初将其忽略)从大括号和方括号中包含的复杂html中提取子字符串在python中使用正则表达式正则表达式
所有我试图得到所需的字符串导致AttributeError:'NoneType'对象没有属性'组'。
这里是我的代码:
image = re.search("photo: /\[[^\]]+\]/", text)
image = image.group(1)
我还在努力学习regex
,但是这一个已经被我扔对于太长的循环。
我想抓取包含照片链接的JSON部分。这是该"uploadTime"
排除"id"
先于一切:
这里是有问题的一块JSON的:
photo: [{
"id": "http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418383-59832.jpg",
"uploadTime": {
"sec": 1498418386,
"usec": 192000
},
"extension": "jpg",
"md5": "6fac68fbcbdb31d17af7be277ab673be",
"height": 600,
"width": 800,
"description": "",
"originalFilePath": "",
"originalFileName": "photo_0D993ADA-8AFC-4A79-8F9B-18E6F6C30B94.jpg"
}, {
"id": "http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418389-472609.jpg",
"uploadTime": {
"sec": 1498418392,
"usec": 118000
},
"extension": "jpg",
"md5": "6470e562d650099a1cafe9281f951c21",
"height": 600,
"width": 800,
"description": "",
"originalFilePath": "",
"originalFileName": "photo_335B7BC0-F6DE-4E19-8489-3AA7B3920144.jpg"
}, {
"id": "http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418397-06491.jpg",
"uploadTime": {
"sec": 1498418400,
"usec": 161000
},
"extension": "jpg",
"md5": "5f2df3edfed164c062e739c0c3258970",
"height": 600,
"width": 800,
"description": "",
"originalFilePath": "",
"originalFileName": "photo_9C57A971-9748-4DBD-919D-8D532C8D7C1A.jpg"
}, {
"id": "http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418403-391642.jpg",
"uploadTime": {
"sec": 1498418406,
"usec": 936000
},
"extension": "jpg",
"md5": "098dfa4d40e33c6897f62edc471670dd",
"height": 600,
"width": 800,
"description": "",
"originalFilePath": "",
"originalFileName": "photo_A55BD209-3BFB-447E-AE59-40CF656664A8.jpg"
}, {
"id": "http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418409-263588.jpg",
"uploadTime": {
"sec": 1498418412,
"usec": 789000
},
"extension": "jpg",
"md5": "50b69c1db486f4bb6af723f7395a360b",
"height": 600,
"width": 800,
"description": "",
"originalFilePath": "",
"originalFileName": "photo_8BCDC2F0-8CBA-442C-98F5-0389455C8014.jpg"
}, {
"id": "http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418415-54882.jpg",
"uploadTime": {
"sec": 1498418418,
"usec": 462000
},
"extension": "jpg",
"md5": "34296cda28b212a6c5590f233a2dca09",
"height": 600,
"width": 800,
"description": "",
"originalFilePath": "",
"originalFileName": "photo_726D1636-E3A9-4515-9B95-55161FAAF730.jpg"
}, {
"id": "http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418421-389128.jpg",
"uploadTime": {
"sec": 1498418424,
"usec": 518000
},
"extension": "jpg",
"md5": "265087f19c17a99561a817f02a097b21",
"height": 600,
"width": 800,
"description": "",
"originalFilePath": "",
"originalFileName": "photo_09B01A71-46F2-4D8F-9153-CE0F0017495A.jpg"
}]
这JSON片是一个较大字符串的一部分:
<script type="text/javascript">
var listingData = {};
var userData = {};
window.detailPage = window.detailPage || {};
window.detailPage.listingData = {
id: 44782446,
status: "Active",
createTime: 1498418380,
displayTime: 1500694902,
expireTime: 1503286902,
title: "Yamaha RX-V461",
description: "Great Audio\/Video 5.1 surround receiver. Great condition ",
city: "South Jordan",
state: "UT",
zip: 84095,
contactName: "Robert",
contactHomePhone: "801-635-6040",
contactCellPhone: "801-635-6040",
contactEmail: "hasEmail",
lat: 40.5693,
lon: -111.9672,
latLon: "40.5693,-111.9672",
price: 50,
category: "Electronics",
subCategory: "Home Audio Receivers",
marketType: "Sale",
sellerType: "Private",
photo: [{"id":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418383-59832.jpg","uploadTime":{"sec":1498418386,"usec":192000},"extension":"jpg","md5":"6fac68fbcbdb31d17af7be277ab673be","height":600,"width":800,"description":"","originalFilePath":"","originalFileName":"photo_0D993ADA-8AFC-4A79-8F9B-18E6F6C30B94.jpg"},{"id":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418389-472609.jpg","uploadTime":{"sec":1498418392,"usec":118000},"extension":"jpg","md5":"6470e562d650099a1cafe9281f951c21","height":600,"width":800,"description":"","originalFilePath":"","originalFileName":"photo_335B7BC0-F6DE-4E19-8489-3AA7B3920144.jpg"},{"id":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418397-06491.jpg","uploadTime":{"sec":1498418400,"usec":161000},"extension":"jpg","md5":"5f2df3edfed164c062e739c0c3258970","height":600,"width":800,"description":"","originalFilePath":"","originalFileName":"photo_9C57A971-9748-4DBD-919D-8D532C8D7C1A.jpg"},{"id":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418403-391642.jpg","uploadTime":{"sec":1498418406,"usec":936000},"extension":"jpg","md5":"098dfa4d40e33c6897f62edc471670dd","height":600,"width":800,"description":"","originalFilePath":"","originalFileName":"photo_A55BD209-3BFB-447E-AE59-40CF656664A8.jpg"},{"id":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418409-263588.jpg","uploadTime":{"sec":1498418412,"usec":789000},"extension":"jpg","md5":"50b69c1db486f4bb6af723f7395a360b","height":600,"width":800,"description":"","originalFilePath":"","originalFileName":"photo_8BCDC2F0-8CBA-442C-98F5-0389455C8014.jpg"},{"id":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418415-54882.jpg","uploadTime":{"sec":1498418418,"usec":462000},"extension":"jpg","md5":"34296cda28b212a6c5590f233a2dca09","height":600,"width":800,"description":"","originalFilePath":"","originalFileName":"photo_726D1636-E3A9-4515-9B95-55161FAAF730.jpg"},{"id":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418421-389128.jpg","uploadTime":{"sec":1498418424,"usec":518000},"extension":"jpg","md5":"265087f19c17a99561a817f02a097b21","height":600,"width":800,"description":"","originalFilePath":"","originalFileName":"photo_09B01A71-46F2-4D8F-9153-CE0F0017495A.jpg"}],
standardFeaturedDates: [],
favorited: 1,
pageViews: 68 };
window.detailPage.sellerData = {
sellerId: 1159545,
sellerAccountAge: "Nov 2010",
moreListingsFromSeller: [{"id":44782211,"displayTime":1500694907,"price":100,"title":"Moto Gear 3 Helmets and Alpine Star Tech 6 Boots S","photo":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498417151-456217.jpg"},{"id":44782400,"displayTime":1500694904,"price":30,"title":"Belts Pouch, Canteen Holsters For 2 Canteens","photo":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418072-282620.jpg"}] };
window.detailPage.userData = {
testUser: Boolean(0)
};
</script>
我如何提取我想要的那件?
感谢您看我的问题!
哇。这不是HTML。这是json。你应该使用JSON解析器。 –
此外,您还没有指定您想要哪种语言的解决方案。 –
可能是发布[此问题]的用户@AlexR的假帐户(https://stackoverflow.com/questions/45257932/how-to-extract-一小时之前,从复杂的javascript-text-html-container-nested-in-betw)。 –