diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1feae78 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.exe diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..260e111 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "ae"] + path = ae + url = git://github.com/CyberShadow/ae.git diff --git a/README b/README deleted file mode 100644 index 3ac36bf..0000000 --- a/README +++ /dev/null @@ -1,9 +0,0 @@ -This project is written to use the Phobos standard library for D, -so it must be compiled with a compiler which supports Phobos, such -as GDC. - -To compile it using GDC: - -gdc *.d -o wp2git - -which generates an executable called wp2git. diff --git a/ae b/ae new file mode 160000 index 0000000..1962478 --- /dev/null +++ b/ae @@ -0,0 +1 @@ +Subproject commit 196247891e84b6b30b54089d4a64163fad6807c3 diff --git a/litexml.d b/litexml.d deleted file mode 100644 index ffb12c4..0000000 --- a/litexml.d +++ /dev/null @@ -1,355 +0,0 @@ -// Written in the D Programming Language, version 1 - -/// Light read-only XML library -module litexml; - -import std.stream; -import std.string; -import std.ctype; - -alias std.string.iswhite iswhite; -alias std.string.tolower tolower; - -enum XmlNodeType -{ - Root, - Node, - //Comment, - Meta, - Text -} - -class XmlNode -{ - string tag; - string[string] attributes; - XmlNode[] children; - XmlNodeType type; - - this(Stream s) - { - again: - char c; - do - s.read(c); - while(iswhite(c)); - - if(c!='<') // text node - { - type = XmlNodeType.Text; - while(c!='<') - { - // TODO: check for EOF - tag ~= c; - s.read(c); - } - s.seekCur(-1); // rewind to '<' - //tag = tag.strip(); - } - else - { - s.read(c); - if(c=='!') // comment - { - expect(s, '-'); - expect(s, '-'); - char c1, c2, c3; - do - { - c1 = c2; c2 = c3; s.read(c3); - } while (!(c1=='-' && c2=='-' && c3=='>')); - goto again; - } - else - if(c=='?') - { - type = XmlNodeType.Meta; - tag=readWord(s); - if(tag.length==0) throw new Exception("Invalid tag"); - while(true) - { - skipWhitespace(s); - if(peek(s)=='?') - break; - readAttribute(s); - } - s.read(c); - expect(s, '>'); - } - else - if(c=='/') - throw new Exception("Unexpected close tag"); - else - { - type = XmlNodeType.Node; - tag = c~readWord(s); - while(true) - { - skipWhitespace(s); - c = peek(s); - if(c=='>' || c=='/') - break; - readAttribute(s); - } - s.read(c); - if(c=='>') - { - while(true) - { - skipWhitespace(s); - if(peek(s)=='<' && peek(s, 2)=='/') - break; - try - children ~= new XmlNode(s); - catch(Object e) - throw new Exception("Error while processing child of "~tag~":\n"~e.toString); - } - expect(s, '<'); - expect(s, '/'); - foreach(tc;tag) - expect(s, tc); - expect(s, '>'); - } - else - expect(s, '>'); - } - } - } - - string toString() - { - switch(type) - { - case XmlNodeType.Text: - // TODO: compact whitespace - return '"' ~ convertEntities(tag) ~ '"'; - case XmlNodeType.Node: - case XmlNodeType.Root: - string attrText; - foreach(key,value;attributes) - attrText ~= ' ' ~ key ~ `="` ~ value ~ '"'; - string childrenText; - foreach(child;children) - childrenText ~= child.toString(); - return '<' ~ tag ~ attrText ~ '>' ~ childrenText ~ "'; - default: - return null; - } - } - - string text() - { - switch(type) - { - case XmlNodeType.Text: - return convertEntities(tag); - case XmlNodeType.Node: - case XmlNodeType.Root: - string childrenText; - foreach(child;children) - childrenText ~= child.text(); - return childrenText; - default: - return null; - } - } - - final XmlNode findChild(string tag) - { - foreach(child;children) - if(child.tag == tag) - return child; - return null; - } - - final XmlNode opIndex(string tag) - { - return findChild(tag); - } - - final XmlNode opIndex(int index) - { - return children[index]; - } - - int opApply(int delegate(ref XmlNode) dg) - { - int result = 0; - - for (int i = 0; i < children.length; i++) - { - result = dg(children[i]); - if (result) - break; - } - return result; - } - -private: - final void readAttribute(Stream s) - { - string name = readWord(s); - if(name.length==0) throw new Exception("Invalid attribute"); - skipWhitespace(s); - expect(s, '='); - skipWhitespace(s); - char delim; - s.read(delim); - if(delim != '\'' && delim != '"') - throw new Exception("Expected ' or \'"); - string value; - while(true) - { - char c; - s.read(c); - if(c==delim) break; - value ~= c; - } - attributes[name]=value; - } - - this() - { - } -} - -class XmlDocument : XmlNode -{ - this(Stream s) - { - type = XmlNodeType.Root; - tag = ""; - skipWhitespace(s); - while(s.position < s.size) - try - { - children ~= new XmlNode(s); - skipWhitespace(s); - } - catch(Object o) - break; - } -} - -private: - -char peek(Stream s, int n=1) -{ - char c; - for(int i=0;i0) - if(entity[0]=='#') - if(entity.length>1 && entity[1]=='x') - { - dchar c; - sscanf(toStringz(entity[2..$]), "%x", &c); - if(c) - str = str[0..i] ~ c ~ str[j+1..$]; - } - else - { - dchar c; - sscanf(toStringz(entity[1..$]), "%d", &c); - if(c) - str = str[0..i] ~ c ~ str[j+1..$]; - } - else - if(entity in entities) - str = str[0..i] ~ toUTF32(entities[entity]) ~ str[j+1..$]; - break; - } - } - return toUTF8(str); -} diff --git a/wp2git.d b/wp2git.d index 9b55a70..fcf0030 100644 --- a/wp2git.d +++ b/wp2git.d @@ -1,4 +1,4 @@ -// Written in the D Programming Language, version 1 +// Written in the D Programming Language (version 2) import std.stdio; import std.process; @@ -7,7 +7,8 @@ import std.string; import std.file; import std.conv; import std.uri; -import litexml; + +import ae.utils.xmllite; int main(string[] args) { @@ -37,12 +38,12 @@ int main(string[] args) if (args.length == 1 || usage) { - fwritefln(stderr, "Usage: %s Article_name [OPTION]...", args[0]); - fwritefln(stderr, "Create a git repository with the history of the specified Wikipedia article."); - fwritefln(stderr, "Supported options:"); - fwritefln(stderr, " -h --help Display this help"); - fwritefln(stderr, " --no-import Don't invoke ``git fast-import'' and only generate the fast-import data"); - fwritefln(stderr, " --language LANG Specify the Wikipedia language subdomain (default: en)"); + stderr.writefln("Usage: %s Article_name [OPTION]...", args[0]); + stderr.writefln("Create a git repository with the history of the specified Wikipedia article."); + stderr.writefln("Supported options:"); + stderr.writefln(" -h --help Display this help"); + stderr.writefln(" --no-import Don't invoke ``git fast-import'' and only generate the fast-import data"); + stderr.writefln(" --language LANG Specify the Wikipedia language subdomain (default: en)"); return 2; } @@ -55,10 +56,10 @@ int main(string[] args) if (spawnvp(P_WAIT, "curl", ["curl", "-d", "\"\"", "http://" ~ language ~ ".wikipedia.org/w/index.php?title=Special:Export&pages=" ~ encodeComponent(name), "-o", "history.xml"])) throw new Exception("curl error"); - fwritefln(stderr, "Loading history..."); + stderr.writefln("Loading history..."); string xmldata = cast(string) read("history.xml"); std.file.remove("history.xml"); - auto xml = new XmlDocument(new MemoryStream(xmldata)); + auto xml = new XmlDocument(xmldata); string data = "reset refs/heads/master\n"; auto page = xml[0]["page"]; @@ -68,23 +69,23 @@ int main(string[] args) if (child.tag=="revision") { string id = child["id"].text; - string summary = child["comment"] ? child["comment"].text : null; - string committer = child["contributor"]["username"] ? child["contributor"]["username"].text : child["contributor"]["ip"].text; + string summary = child.findChild("comment") ? child["comment"].text : null; + string committer = child["contributor"].findChild("username") ? child["contributor"]["username"].text : child["contributor"]["ip"].text; string text = child["text"].text; - fwritefln(stderr, "Revision %s by %s: %s", id, committer, summary); + stderr.writefln("Revision %s by %s: %s", id, committer, summary); summary ~= "\n\nhttp://" ~ language ~ ".wikipedia.org/w/index.php?oldid=" ~ id; data ~= "commit refs/heads/master\n" ~ "committer " ~ committer ~ " <" ~ committer ~ "@" ~ language ~ ".wikipedia.org> " ~ ISO8601toRFC2822(child["timestamp"].text) ~ "\n" ~ - "data " ~ .toString(summary.length) ~ "\n" ~ + "data " ~ to!string(summary.length) ~ "\n" ~ summary ~ "\n" ~ "M 644 inline " ~ name ~ ".txt\n" ~ - "data " ~ .toString(text.length) ~ "\n" ~ + "data " ~ to!string(text.length) ~ "\n" ~ text ~ "\n" ~ "\n"; } - write("fast-import-data", data); + std.file.write("fast-import-data", data); if (noImport) return 0; @@ -106,5 +107,5 @@ string ISO8601toRFC2822(string s) // 2010-06-15T19:28:44Z // Feb 6 11:22:18 2007 -0500 - return monthNames[.toInt(s[5..7])-1] ~ " " ~ s[8..10] ~ " " ~ s[11..13] ~ ":" ~ s[14..16] ~ ":" ~ s[17..19] ~ " " ~ s[0..4] ~ " +0000"; + return monthNames[.to!int(s[5..7])-1] ~ " " ~ s[8..10] ~ " " ~ s[11..13] ~ ":" ~ s[14..16] ~ ":" ~ s[17..19] ~ " " ~ s[0..4] ~ " +0000"; }