wp2git/wp2git.d

93 lines
3.1 KiB
D
Raw Normal View History

2013-08-29 16:25:15 -05:00
// Written in the D Programming Language (version 2)
2010-09-12 15:44:26 -05:00
2010-06-15 16:17:10 -05:00
import std.stdio;
import std.process;
import std.stream;
import std.string;
import std.file;
2010-06-15 18:09:41 -05:00
import std.conv;
import std.uri;
2013-08-29 16:30:28 -05:00
import std.getopt;
import std.exception;
2013-08-29 16:25:15 -05:00
import ae.utils.xmllite;
2010-06-15 16:17:10 -05:00
int main(string[] args)
{
2013-08-29 17:00:28 -05:00
string language="en";
2010-09-12 17:27:35 -05:00
bool usage, noImport;
2013-08-29 16:30:28 -05:00
getopt(args,
"h|help", &usage,
"no-import", &noImport,
"language", &language,
);
enforce(args.length<=2, "Multiple article name arguments");
if (args.length == 1 || usage)
2010-06-15 16:17:10 -05:00
{
2013-08-29 16:25:15 -05:00
stderr.writefln("Usage: %s Article_name [OPTION]...", args[0]);
stderr.writefln("Create a git repository with the history of the specified Wikipedia article.");
stderr.writefln("Supported options:");
stderr.writefln(" -h --help Display this help");
stderr.writefln(" --no-import Don't invoke ``git fast-import'' and only generate the fast-import data");
stderr.writefln(" --language LANG Specify the Wikipedia language subdomain (default: en)");
return 2;
2010-06-15 16:17:10 -05:00
}
2013-08-29 17:00:28 -05:00
enforce(args.length==2, "No article specified");
auto name = args[1];
2013-08-29 17:00:28 -05:00
enforce(spawnvp(P_WAIT, "curl", ["curl", "-d", "\"\"", "http://" ~ language ~ ".wikipedia.org/w/index.php?title=Special:Export&pages=" ~ encodeComponent(name), "-o", "history.xml"])==0, "curl error");
2010-06-15 16:17:10 -05:00
2013-08-29 16:25:15 -05:00
stderr.writefln("Loading history...");
2010-06-15 16:57:25 -05:00
string xmldata = cast(string) read("history.xml");
2010-06-15 18:10:33 -05:00
std.file.remove("history.xml");
2013-08-29 16:25:15 -05:00
auto xml = new XmlDocument(xmldata);
2010-06-15 16:17:10 -05:00
2010-06-15 17:49:48 -05:00
string data = "reset refs/heads/master\n";
2010-09-12 17:25:01 -05:00
auto page = xml[0]["page"];
2013-08-29 17:00:28 -05:00
enforce(page, "No such page");
2010-09-12 17:25:01 -05:00
foreach (child; page)
2010-06-15 16:17:10 -05:00
if (child.tag=="revision")
{
2010-06-15 18:26:01 -05:00
string id = child["id"].text;
2013-08-29 16:25:15 -05:00
string summary = child.findChild("comment") ? child["comment"].text : null;
string committer = child["contributor"].findChild("username") ? child["contributor"]["username"].text : child["contributor"]["ip"].text;
2010-06-15 16:55:24 -05:00
string text = child["text"].text;
2013-08-29 16:25:15 -05:00
stderr.writefln("Revision %s by %s: %s", id, committer, summary);
2010-06-15 18:26:01 -05:00
summary ~= "\n\nhttp://" ~ language ~ ".wikipedia.org/w/index.php?oldid=" ~ id;
2010-06-15 16:17:10 -05:00
data ~=
2010-06-15 17:50:02 -05:00
"commit refs/heads/master\n" ~
"committer " ~ committer ~ " <" ~ committer ~ "@" ~ language ~ ".wikipedia.org> " ~ ISO8601toRFC2822(child["timestamp"].text) ~ "\n" ~
2013-08-29 16:25:15 -05:00
"data " ~ to!string(summary.length) ~ "\n" ~
2010-06-15 16:17:10 -05:00
summary ~ "\n" ~
"M 644 inline " ~ name ~ ".txt\n" ~
2013-08-29 16:25:15 -05:00
"data " ~ to!string(text.length) ~ "\n" ~
2010-06-15 16:17:10 -05:00
text ~ "\n" ~
"\n";
}
2013-08-29 16:25:15 -05:00
std.file.write("fast-import-data", data);
2010-06-15 17:05:13 -05:00
2010-09-12 17:27:35 -05:00
if (noImport)
return 0;
2013-08-29 17:00:28 -05:00
enforce(!exists(".git"), "A git repository already exists here!");
system("git init");
2010-06-15 18:09:41 -05:00
system("git fast-import --date-format=rfc2822 < fast-import-data");
2010-09-12 17:27:35 -05:00
std.file.remove("fast-import-data");
system("git reset --hard");
2010-06-15 17:05:13 -05:00
2010-06-15 16:55:24 -05:00
return 0;
2010-06-15 16:17:10 -05:00
}
2010-06-15 18:09:41 -05:00
string ISO8601toRFC2822(string s)
{
2010-09-12 17:44:38 -05:00
const monthNames = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
// 2010-06-15T19:28:44Z
// Feb 6 11:22:18 2007 -0500
2013-08-29 16:25:15 -05:00
return monthNames[.to!int(s[5..7])-1] ~ " " ~ s[8..10] ~ " " ~ s[11..13] ~ ":" ~ s[14..16] ~ ":" ~ s[17..19] ~ " " ~ s[0..4] ~ " +0000";
2010-06-15 18:09:41 -05:00
}