Whitespace

This commit is contained in:
Vladimir Panteleev 2013-08-29 22:02:55 +00:00
parent 62071473a3
commit 687bfafc53
4 changed files with 118 additions and 118 deletions

2
.gitignore vendored
View File

@ -1 +1 @@
*.exe *.exe

View File

@ -1,4 +1,4 @@
This project contains code written by: This project contains code written by:
Vladimir Panteleev <vladimir@thecybershadow.net> Vladimir Panteleev <vladimir@thecybershadow.net>
Robin Green <greenrd@greenrd.org> Robin Green <greenrd@greenrd.org>

42
LICENSE
View File

@ -1,21 +1,21 @@
Copyright (c) 2010 Vladimir Panteleev and contributors (see the file AUTHORS Copyright (c) 2010 Vladimir Panteleev and contributors (see the file AUTHORS
for a complete list) for a complete list)
This software is provided 'as-is', without any express or implied This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages warranty. In no event will the authors be held liable for any damages
arising from the use of this software. arising from the use of this software.
Permission is granted to anyone to use this software for any purpose, Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions: freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not 1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be in a product, an acknowledgment in the product documentation would be
appreciated but is not required. appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be 2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software. misrepresented as being the original software.
3. This notice may not be removed or altered from any source 3. This notice may not be removed or altered from any source
distribution. distribution.

184
wp2git.d
View File

@ -1,92 +1,92 @@
// Written in the D Programming Language (version 2) // Written in the D Programming Language (version 2)
import std.stdio; import std.stdio;
import std.process; import std.process;
import std.stream; import std.stream;
import std.string; import std.string;
import std.file; import std.file;
import std.conv; import std.conv;
import std.uri; import std.uri;
import std.getopt; import std.getopt;
import std.exception; import std.exception;
import ae.utils.xmllite; import ae.utils.xmllite;
int main(string[] args) int main(string[] args)
{ {
string language="en"; string language="en";
bool usage, noImport; bool usage, noImport;
getopt(args, getopt(args,
"h|help", &usage, "h|help", &usage,
"no-import", &noImport, "no-import", &noImport,
"language", &language, "language", &language,
); );
enforce(args.length<=2, "Multiple article name arguments"); enforce(args.length<=2, "Multiple article name arguments");
if (args.length == 1 || usage) if (args.length == 1 || usage)
{ {
stderr.writefln("Usage: %s Article_name [OPTION]...", args[0]); stderr.writefln("Usage: %s Article_name [OPTION]...", args[0]);
stderr.writefln("Create a git repository with the history of the specified Wikipedia article."); stderr.writefln("Create a git repository with the history of the specified Wikipedia article.");
stderr.writefln("Supported options:"); stderr.writefln("Supported options:");
stderr.writefln(" -h --help Display this help"); stderr.writefln(" -h --help Display this help");
stderr.writefln(" --no-import Don't invoke ``git fast-import'' and only generate the fast-import data"); stderr.writefln(" --no-import Don't invoke ``git fast-import'' and only generate the fast-import data");
stderr.writefln(" --language LANG Specify the Wikipedia language subdomain (default: en)"); stderr.writefln(" --language LANG Specify the Wikipedia language subdomain (default: en)");
return 2; return 2;
} }
enforce(args.length==2, "No article specified"); enforce(args.length==2, "No article specified");
auto name = args[1]; auto name = args[1];
enforce(spawnvp(P_WAIT, "curl", ["curl", "-d", "\"\"", "http://" ~ language ~ ".wikipedia.org/w/index.php?title=Special:Export&pages=" ~ encodeComponent(name), "-o", "history.xml"])==0, "curl error"); enforce(spawnvp(P_WAIT, "curl", ["curl", "-d", "\"\"", "http://" ~ language ~ ".wikipedia.org/w/index.php?title=Special:Export&pages=" ~ encodeComponent(name), "-o", "history.xml"])==0, "curl error");
stderr.writefln("Loading history..."); stderr.writefln("Loading history...");
string xmldata = cast(string) read("history.xml"); string xmldata = cast(string) read("history.xml");
std.file.remove("history.xml"); std.file.remove("history.xml");
auto xml = new XmlDocument(xmldata); auto xml = new XmlDocument(xmldata);
string data = "reset refs/heads/master\n"; string data = "reset refs/heads/master\n";
auto page = xml[0]["page"]; auto page = xml[0]["page"];
enforce(page, "No such page"); enforce(page, "No such page");
foreach (child; page) foreach (child; page)
if (child.tag=="revision") if (child.tag=="revision")
{ {
string id = child["id"].text; string id = child["id"].text;
string summary = child.findChild("comment") ? child["comment"].text : null; string summary = child.findChild("comment") ? child["comment"].text : null;
string committer = child["contributor"].findChild("username") ? child["contributor"]["username"].text : child["contributor"]["ip"].text; string committer = child["contributor"].findChild("username") ? child["contributor"]["username"].text : child["contributor"]["ip"].text;
string text = child["text"].text; string text = child["text"].text;
stderr.writefln("Revision %s by %s: %s", id, committer, summary); stderr.writefln("Revision %s by %s: %s", id, committer, summary);
summary ~= "\n\nhttp://" ~ language ~ ".wikipedia.org/w/index.php?oldid=" ~ id; summary ~= "\n\nhttp://" ~ language ~ ".wikipedia.org/w/index.php?oldid=" ~ id;
data ~= data ~=
"commit refs/heads/master\n" ~ "commit refs/heads/master\n" ~
"committer " ~ committer ~ " <" ~ committer ~ "@" ~ language ~ ".wikipedia.org> " ~ ISO8601toRFC2822(child["timestamp"].text) ~ "\n" ~ "committer " ~ committer ~ " <" ~ committer ~ "@" ~ language ~ ".wikipedia.org> " ~ ISO8601toRFC2822(child["timestamp"].text) ~ "\n" ~
"data " ~ to!string(summary.length) ~ "\n" ~ "data " ~ to!string(summary.length) ~ "\n" ~
summary ~ "\n" ~ summary ~ "\n" ~
"M 644 inline " ~ name ~ ".txt\n" ~ "M 644 inline " ~ name ~ ".txt\n" ~
"data " ~ to!string(text.length) ~ "\n" ~ "data " ~ to!string(text.length) ~ "\n" ~
text ~ "\n" ~ text ~ "\n" ~
"\n"; "\n";
} }
std.file.write("fast-import-data", data); std.file.write("fast-import-data", data);
if (noImport) if (noImport)
return 0; return 0;
enforce(!exists(".git"), "A git repository already exists here!"); enforce(!exists(".git"), "A git repository already exists here!");
system("git init"); system("git init");
system("git fast-import --date-format=rfc2822 < fast-import-data"); system("git fast-import --date-format=rfc2822 < fast-import-data");
std.file.remove("fast-import-data"); std.file.remove("fast-import-data");
system("git reset --hard"); system("git reset --hard");
return 0; return 0;
} }
string ISO8601toRFC2822(string s) string ISO8601toRFC2822(string s)
{ {
const monthNames = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]; const monthNames = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
// 2010-06-15T19:28:44Z // 2010-06-15T19:28:44Z
// Feb 6 11:22:18 2007 -0500 // Feb 6 11:22:18 2007 -0500
return monthNames[.to!int(s[5..7])-1] ~ " " ~ s[8..10] ~ " " ~ s[11..13] ~ ":" ~ s[14..16] ~ ":" ~ s[17..19] ~ " " ~ s[0..4] ~ " +0000"; return monthNames[.to!int(s[5..7])-1] ~ " " ~ s[8..10] ~ " " ~ s[11..13] ~ ":" ~ s[14..16] ~ ":" ~ s[17..19] ~ " " ~ s[0..4] ~ " +0000";
} }