-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathXMLToCSV_old.java
144 lines (124 loc) · 6.15 KB
/
XMLToCSV_old.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import java.io.*;
import javax.xml.parsers.*;
import org.w3c.dom.*;
import java.util.*;
// This Java program reads the XML and converts it into CSV files, adhering to the relational schema we designed.
// The CSV files will use a tab delimiter ("\t") and end-of-line character "\n".
// It's using DOM parsing
public class XMLToCSV_old {
public static void main(String[] args) {
try {
// Load XML document
// === Change the XML filepath here ===
File inputFile = new File("ebay-data/items-0.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(inputFile);
doc.getDocumentElement().normalize();
// Parse and write to CSV files
parseItems(doc);
parseItemCategory(doc);
parseBids(doc);
parseBidder(doc);
parseSeller(doc);
} catch (Exception e) {
e.printStackTrace();
}
}
private static void parseItems(Document doc) throws IOException {
NodeList nodeList = doc.getElementsByTagName("Item");
FileWriter fw = new FileWriter("ebay-data-csv/Items.csv");
BufferedWriter bw = new BufferedWriter(fw);
bw.write("ItemID\tName\tCurrently\tBuy_Price\tFirst_Bid\tNumber_of_Bids\tLocation\tCountry\tStarted\tEnds\tDescription\n");
for (int i = 0; i < nodeList.getLength(); i++) {
Element element = (Element) nodeList.item(i);
String itemID = element.getAttribute("ItemID");
String name = getTagValue("Name", element);
String currently = getTagValue("Currently", element);
String buyPrice = getTagValue("Buy_Price", element);
String firstBid = getTagValue("First_Bid", element);
String numberOfBids = getTagValue("Number_of_Bids", element);
String location = getTagValue("Location", element);
String country = getTagValue("Country", element);
String started = getTagValue("Started", element);
String ends = getTagValue("Ends", element);
String description = getTagValue("Description", element);
bw.write(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
itemID, name, currently, buyPrice, firstBid, numberOfBids, location, country, started, ends, description));
}
bw.close();
}
private static void parseItemCategory(Document doc) throws IOException {
NodeList nodeList = doc.getElementsByTagName("Item");
FileWriter fw = new FileWriter("ebay-data-csv/ItemCategory.csv");
BufferedWriter bw = new BufferedWriter(fw);
bw.write("ItemID\tCategory\n");
for (int i = 0; i < nodeList.getLength(); i++) {
Element element = (Element) nodeList.item(i);
String itemID = element.getAttribute("ItemID");
NodeList categories = element.getElementsByTagName("Category");
for (int j = 0; j < categories.getLength(); j++) {
String category = categories.item(j).getTextContent();
bw.write(String.format("%s\t%s\n", itemID, category));
}
}
bw.close();
}
private static void parseBids(Document doc) throws IOException {
NodeList nodeList = doc.getElementsByTagName("Bid");
FileWriter fw = new FileWriter("ebay-data-csv/Bids.csv");
BufferedWriter bw = new BufferedWriter(fw);
bw.write("ItemID\tBidderID\tTime\tAmount\n");
for (int i = 0; i < nodeList.getLength(); i++) {
Element element = (Element) nodeList.item(i);
String bidderID = ((Element)element.getElementsByTagName("Bidder").item(0)).getAttribute("UserID");
String time = getTagValue("Time", element);
String amount = getTagValue("Amount", element);
bw.write(String.format("%s\t%s\t%s\n", bidderID, time, amount));
}
bw.close();
}
private static void parseBidder(Document doc) throws IOException {
NodeList nodeList = doc.getElementsByTagName("Bidder");
FileWriter fw = new FileWriter("ebay-data-csv/Bidder.csv");
BufferedWriter bw = new BufferedWriter(fw);
bw.write("UserID\tRating\tLocation\tCountry\n");
Set<String> bidders = new HashSet<>();
for (int i = 0; i < nodeList.getLength(); i++) {
Element element = (Element) nodeList.item(i);
String userID = element.getAttribute("UserID");
if (!bidders.contains(userID)) {
bidders.add(userID);
String rating = element.getAttribute("Rating");
String location = getTagValue("Location", element);
String country = getTagValue("Country", element);
bw.write(String.format("%s\t%s\t%s\t%s\n", userID, rating, location, country));
}
}
bw.close();
}
private static void parseSeller(Document doc) throws IOException {
NodeList nodeList = doc.getElementsByTagName("Seller");
FileWriter fw = new FileWriter("ebay-data-csv/Seller.csv");
BufferedWriter bw = new BufferedWriter(fw);
bw.write("UserID\tRating\n");
Set<String> sellers = new HashSet<>();
for (int i = 0; i < nodeList.getLength(); i++) {
Element element = (Element) nodeList.item(i);
String userID = element.getAttribute("UserID");
if (!sellers.contains(userID)) {
sellers.add(userID);
String rating = element.getAttribute("Rating");
bw.write(String.format("%s\t%s\n", userID, rating));
}
}
bw.close();
}
private static String getTagValue(String tag, Element element) {
NodeList nodeList = element.getElementsByTagName(tag);
if (nodeList.getLength() > 0) {
return nodeList.item(0).getTextContent();
}
return "";
}
}