我目前正在研究能够区分两个XML文件的算法。但是,当涉及处理长XML文件时,这种算法变得非常慢。你有什么想法如何优化它?优化Java差异XML算法
package comparison;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.jdom2.Document;
import org.jdom2.Element;
public class XmlDiff {
private ArrayList<String> differences;
private Document oldXml;
private Document newXml;
private Document configuration;
private ArrayList<String> idDefinitions;
public XmlDiff(Document oldXml, Document newXml, Document configuration) {
this.differences = new ArrayList<String>();
this.oldXml = oldXml;
this.newXml = newXml;
this.configuration = configuration;
this.idDefinitions = new ArrayList<String>();
for (int i = 0; i < this.configuration.getRootElement().getChild("Definition_id").getChildren().size(); i++) {
idDefinitions.add(this.configuration.getRootElement().getChild("Definition_id").getChildren().get(i).getAttributeValue("Id_def"));
}
}
public Document getOldXml() {
return this.oldXml;
}
public Document getNewXml() {
return this.newXml;
}
public Document getConfiguration() {
return this.configuration;
}
public ArrayList<String> getDifferences() {
return differences;
}
public ArrayList<String> getIdDefinitions() {
return this.idDefinitions;
}
public boolean diffNodeName(Node oldNode, Node newNode) {
if (oldNode.getNodeName().toLowerCase() == null && newNode.getNodeName().toLowerCase() == null) {
return false;
}
if (oldNode.getNodeName().toLowerCase() == null && newNode.getNodeName().toLowerCase() != null) {
return true;
}
if (oldNode.getNodeName().toLowerCase() != null && newNode.getNodeName().toLowerCase() == null) {
return true;
}
if (!oldNode.getNodeName().toLowerCase().equals(newNode.getNodeName().toLowerCase())) {
return true;
}
return false;
}
public boolean diffNodeAttributes(Node oldNode, Node newNode) {
HashMap<String, String> oldAttributesHashMap = oldNode.buildHashMapFromAttributes();
HashMap<String, String> newAttributesHashMap = newNode.buildHashMapFromAttributes();
Set oldEntrySet = oldAttributesHashMap.entrySet();
Iterator oldIter = oldEntrySet.iterator();
while (oldIter.hasNext()) {
Map.Entry oldMe = (Map.Entry) oldIter.next();
if (newAttributesHashMap.get(oldMe.getKey()) != null && oldAttributesHashMap.get(oldMe.getKey()).toLowerCase().equals(newAttributesHashMap.get(oldMe.getKey()).toLowerCase())) {
oldIter.remove();
oldAttributesHashMap.remove(oldMe.getKey());
newAttributesHashMap.remove(oldMe.getKey());
}
}
return !(oldAttributesHashMap.isEmpty() && newAttributesHashMap.isEmpty());
}
public void diffNodeValue(Node oldNode, String oldPath, Node newNode, String newPath) {
if (oldNode.getValue() == null && newNode.getValue() != null) {
this.getDifferences().add("value modified : " + oldPath + " } " + newPath + " : " + oldNode.getValue() + " changed to " + newNode.getValue());
}
if (oldNode.getValue() != null && newNode.getValue() == null) {
this.getDifferences().add("value modified : " + oldPath + " } " + newPath + " : " + oldNode.getValue() + " changed to " + newNode.getValue());
}
if (!oldNode.getValue().equals(newNode.getValue())) {
this.getDifferences().add("value modified : " + oldPath + " } " + newPath + " : " + oldNode.getValue() + " changed to " + newNode.getValue());
}
}
public void compare(Node oldRoot, String oldPath, Node newRoot, String newPath) {
if (oldRoot.hasChildren() && newRoot.hasChildren()) {
String memoryOldPath = oldPath;
String memoryNewPath = newPath;
HashMap<Integer, Node> oldChildren = oldRoot.buildHashMapFromChildren();
HashMap<Integer, Node> newChildren = newRoot.buildHashMapFromChildren();
Set oldEntrySet = oldChildren.entrySet();
Iterator oldIter = oldEntrySet.iterator();
while (oldIter.hasNext()) {
Map.Entry oldMe = (Map.Entry) oldIter.next();
int actualIndex = 0;
Set newEntrySet = newChildren.entrySet();
Iterator newIter = newEntrySet.iterator();
Node actualOldChild = oldChildren.get(oldMe.getKey());
boolean matched = false;
boolean valueChanged = false;
boolean attributesChanged = false;
while (!matched && newIter.hasNext()) {
Map.Entry newMe = (Map.Entry) newIter.next();
Node actualNewChild = newChildren.get(newMe.getKey());
if (actualOldChild.getNodeName().toLowerCase().equals(actualNewChild.getNodeName().toLowerCase())) {
if (actualOldChild.getHasSimilarSiblings() || actualNewChild.getHasSimilarSiblings()) {
if (actualOldChild.hasAttributes() || actualNewChild.hasAttributes()) {
if (!this.diffNodeAttributes(actualOldChild, actualNewChild)) {
if (actualOldChild.hasChildren() && actualNewChild.hasChildren()) {
int oldResult = this.findIdChild(actualOldChild.getElement().getChildren());
if (oldResult != -1) {
matched = actualOldChild.getElement().getChildren().get(oldResult).getValue().toLowerCase().equals(actualNewChild.getElement().getChildren().get(oldResult).getValue().toLowerCase());
}
else {
matched = true;
}
} else {
matched = true;
}
}
else {
attributesChanged = true;
}
} else {
if (actualOldChild.hasChildren() && actualNewChild.hasChildren()) {
int oldResult = this.findIdChild(actualOldChild.getElement().getChildren());
if (oldResult != -1) {
matched = actualOldChild.getElement().getChildren().get(oldResult).getValue().toLowerCase().equals(actualNewChild.getElement().getChildren().get(oldResult).getValue().toLowerCase());
}
else {
matched = true;
}
}
else {
matched = ((actualOldChild.getValue().toLowerCase() != null && actualNewChild.getValue().toLowerCase() != null&& actualOldChild.getValue().toLowerCase().equals(actualNewChild.getValue().toLowerCase()))|| (actualOldChild.getValue().toLowerCase() == null&& actualNewChild.getValue().toLowerCase() == null));
valueChanged = ((actualOldChild.getValue().toLowerCase() != null && actualNewChild.getValue().toLowerCase() != null && !actualOldChild.getValue().toLowerCase().equals(actualNewChild.getValue().toLowerCase())) || (actualOldChild.getValue().toLowerCase() != null && actualNewChild.getValue().toLowerCase() == null) || (actualOldChild.getValue().toLowerCase() == null && actualNewChild.getValue().toLowerCase() != null));
}
}
}
else {
if (actualOldChild.hasAttributes()) {
if (!this.diffNodeAttributes(actualOldChild, actualNewChild)) {
matched = true;
}
else {
attributesChanged = true;
}
}
else {
matched = true;
}
}
}
actualIndex = (Integer) newMe.getKey();
}
if (matched || valueChanged || attributesChanged) {
oldRoot = actualOldChild;
newRoot = newChildren.get(actualIndex);
oldPath = oldPath + "/" + oldRoot.getNodeName() + "-" + oldMe.getKey();
newPath = newPath + "/" + newRoot.getNodeName() + "-" + actualIndex;
oldIter.remove();
newIter.remove();
oldChildren.remove(oldMe.getKey());
newChildren.remove(actualIndex);
if (matched) {
this.compare(oldRoot, oldPath, newRoot, newPath);
} else if (valueChanged) {
this.differences.add("value modified : " + oldPath + " } " + newPath + " : " + oldRoot.getValue() + " changed to " + newRoot.getValue());
}
else if(attributesChanged) {
this.differences.add("attributes changed : " + oldPath + " } " + newPath);
}
this.compare(oldRoot, oldPath, newRoot, newPath);
oldPath = memoryOldPath;
newPath = memoryNewPath;
}
}
for (int i : oldChildren.keySet()) {
this.getDifferences().add("deleted : " + oldPath + "/" + oldChildren.get(i).getNodeName() + "-" + i + " } " + newPath);
}
for (int j : newChildren.keySet()) {
this.getDifferences().add("added : " + oldPath + " } " + newPath + "/" + newChildren.get(j).getNodeName() + "-" + j);
}
}
else if ((!oldRoot.hasChildren() && newRoot.hasChildren()) || (oldRoot.hasChildren() && !newRoot.hasChildren())) {
this.getDifferences().add("structure modified : " + oldPath + " } " + newPath);
}
else if (!oldRoot.hasChildren() && !newRoot.hasChildren()) {
this.diffNodeValue(oldRoot, oldPath, newRoot, newPath);
}
}
public int findIdChild(List<Element> children) {
int result = -1;
for (int i = 0; i < this.getIdDefinitions().size(); i++) {
String name = this.getIdDefinitions().get(i);
for (int k = 0; k < children.size(); k++) {
if (children.get(k).getName().equals(name)) {
result = k;
break;
}
}
if (result != -1) {
break;
}
}
return result;
}
}
非常感谢您的帮助!
你是否介绍了它?这显示了什么?瓶颈在哪里? – Robert
另外,你是否必须自己写? https://superuser.com/questions/79920/how-can-i-diff-two-xml-files – Robert