Update to D2

This commit is contained in:
Vladimir Panteleev 2013-08-29 21:25:15 +00:00
parent 15decb678f
commit 6f29dd4a2b
6 changed files with 23 additions and 381 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*.exe

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "ae"]
path = ae
url = git://github.com/CyberShadow/ae.git

9
README
View File

@ -1,9 +0,0 @@
This project is written to use the Phobos standard library for D,
so it must be compiled with a compiler which supports Phobos, such
as GDC.
To compile it using GDC:
gdc *.d -o wp2git
which generates an executable called wp2git.

1
ae Submodule

@ -0,0 +1 @@
Subproject commit 196247891e84b6b30b54089d4a64163fad6807c3

355
litexml.d
View File

@ -1,355 +0,0 @@
// Written in the D Programming Language, version 1
/// Light read-only XML library
module litexml;
import std.stream;
import std.string;
import std.ctype;
alias std.string.iswhite iswhite;
alias std.string.tolower tolower;
enum XmlNodeType
{
Root,
Node,
//Comment,
Meta,
Text
}
class XmlNode
{
string tag;
string[string] attributes;
XmlNode[] children;
XmlNodeType type;
this(Stream s)
{
again:
char c;
do
s.read(c);
while(iswhite(c));
if(c!='<') // text node
{
type = XmlNodeType.Text;
while(c!='<')
{
// TODO: check for EOF
tag ~= c;
s.read(c);
}
s.seekCur(-1); // rewind to '<'
//tag = tag.strip();
}
else
{
s.read(c);
if(c=='!') // comment
{
expect(s, '-');
expect(s, '-');
char c1, c2, c3;
do
{
c1 = c2; c2 = c3; s.read(c3);
} while (!(c1=='-' && c2=='-' && c3=='>'));
goto again;
}
else
if(c=='?')
{
type = XmlNodeType.Meta;
tag=readWord(s);
if(tag.length==0) throw new Exception("Invalid tag");
while(true)
{
skipWhitespace(s);
if(peek(s)=='?')
break;
readAttribute(s);
}
s.read(c);
expect(s, '>');
}
else
if(c=='/')
throw new Exception("Unexpected close tag");
else
{
type = XmlNodeType.Node;
tag = c~readWord(s);
while(true)
{
skipWhitespace(s);
c = peek(s);
if(c=='>' || c=='/')
break;
readAttribute(s);
}
s.read(c);
if(c=='>')
{
while(true)
{
skipWhitespace(s);
if(peek(s)=='<' && peek(s, 2)=='/')
break;
try
children ~= new XmlNode(s);
catch(Object e)
throw new Exception("Error while processing child of "~tag~":\n"~e.toString);
}
expect(s, '<');
expect(s, '/');
foreach(tc;tag)
expect(s, tc);
expect(s, '>');
}
else
expect(s, '>');
}
}
}
string toString()
{
switch(type)
{
case XmlNodeType.Text:
// TODO: compact whitespace
return '"' ~ convertEntities(tag) ~ '"';
case XmlNodeType.Node:
case XmlNodeType.Root:
string attrText;
foreach(key,value;attributes)
attrText ~= ' ' ~ key ~ `="` ~ value ~ '"';
string childrenText;
foreach(child;children)
childrenText ~= child.toString();
return '<' ~ tag ~ attrText ~ '>' ~ childrenText ~ "</" ~ tag ~ '>';
default:
return null;
}
}
string text()
{
switch(type)
{
case XmlNodeType.Text:
return convertEntities(tag);
case XmlNodeType.Node:
case XmlNodeType.Root:
string childrenText;
foreach(child;children)
childrenText ~= child.text();
return childrenText;
default:
return null;
}
}
final XmlNode findChild(string tag)
{
foreach(child;children)
if(child.tag == tag)
return child;
return null;
}
final XmlNode opIndex(string tag)
{
return findChild(tag);
}
final XmlNode opIndex(int index)
{
return children[index];
}
int opApply(int delegate(ref XmlNode) dg)
{
int result = 0;
for (int i = 0; i < children.length; i++)
{
result = dg(children[i]);
if (result)
break;
}
return result;
}
private:
final void readAttribute(Stream s)
{
string name = readWord(s);
if(name.length==0) throw new Exception("Invalid attribute");
skipWhitespace(s);
expect(s, '=');
skipWhitespace(s);
char delim;
s.read(delim);
if(delim != '\'' && delim != '"')
throw new Exception("Expected ' or \'");
string value;
while(true)
{
char c;
s.read(c);
if(c==delim) break;
value ~= c;
}
attributes[name]=value;
}
this()
{
}
}
class XmlDocument : XmlNode
{
this(Stream s)
{
type = XmlNodeType.Root;
tag = "<Root>";
skipWhitespace(s);
while(s.position < s.size)
try
{
children ~= new XmlNode(s);
skipWhitespace(s);
}
catch(Object o)
break;
}
}
private:
char peek(Stream s, int n=1)
{
char c;
for(int i=0;i<n;i++)
s.read(c);
s.seekCur(-n);
return c;
}
void skipWhitespace(Stream s)
{
char c;
do
{
if(s.position==s.size)
return;
s.read(c);
}
while(iswhite(c));
s.seekCur(-1);
}
bool isWord(char c)
{
return c=='-' || c=='_' || c==':' || isalnum(c);
}
string readWord(Stream s)
{
char c;
string result;
while(true)
{
s.read(c);
if(!isWord(c))
break;
result ~= c;
}
s.seekCur(-1);
return result;
}
void expect(Stream s, char c)
{
char c2;
s.read(c2);
if(c!=c2)
throw new Exception("Expected " ~ c ~ ", got " ~ c2);
}
const string[string] entities;
static this()
{
entities =
[
"quot"[]: \&quot;[],
"amp" : \&amp; ,
"lt" : \&lt; ,
"gt" : \&gt; ,
"circ" : \&circ; ,
"tilde" : \&tilde; ,
"nbsp" : \&nbsp; ,
"ensp" : \&ensp; ,
"emsp" : \&emsp; ,
"thinsp": \&thinsp;,
"ndash" : \&ndash; ,
"mdash" : \&mdash; ,
"lsquo" : \&lsquo; ,
"rsquo" : \&rsquo; ,
// "sbquo" : \&sbquo; ,
"ldquo" : \&ldquo; ,
"rdquo" : \&rdquo; ,
// "bdquo" : \&bdquo; ,
"dagger": \&dagger;,
"Dagger": \&Dagger;,
"permil": \&permil;
// "lsaquo": \&lsaquo;,
// "rsaquo": \&rsaquo;,
// "euro" : \&euro;
];
}
import std.utf;
import std.c.stdio;
public string convertEntities(string source)
{
mainLoop:
dstring str = toUTF32(source);
for(int i=0;i<str.length;i++)
{
if(str[i]=='&')
for(int j=i+1;j<str.length;j++)
if(str[j]==';')
{
string entity = toUTF8(str[i+1..j]);
if(entity.length>0)
if(entity[0]=='#')
if(entity.length>1 && entity[1]=='x')
{
dchar c;
sscanf(toStringz(entity[2..$]), "%x", &c);
if(c)
str = str[0..i] ~ c ~ str[j+1..$];
}
else
{
dchar c;
sscanf(toStringz(entity[1..$]), "%d", &c);
if(c)
str = str[0..i] ~ c ~ str[j+1..$];
}
else
if(entity in entities)
str = str[0..i] ~ toUTF32(entities[entity]) ~ str[j+1..$];
break;
}
}
return toUTF8(str);
}

View File

@ -1,4 +1,4 @@
// Written in the D Programming Language, version 1
// Written in the D Programming Language (version 2)
import std.stdio;
import std.process;
@ -7,7 +7,8 @@ import std.string;
import std.file;
import std.conv;
import std.uri;
import litexml;
import ae.utils.xmllite;
int main(string[] args)
{
@ -37,12 +38,12 @@ int main(string[] args)
if (args.length == 1 || usage)
{
fwritefln(stderr, "Usage: %s Article_name [OPTION]...", args[0]);
fwritefln(stderr, "Create a git repository with the history of the specified Wikipedia article.");
fwritefln(stderr, "Supported options:");
fwritefln(stderr, " -h --help Display this help");
fwritefln(stderr, " --no-import Don't invoke ``git fast-import'' and only generate the fast-import data");
fwritefln(stderr, " --language LANG Specify the Wikipedia language subdomain (default: en)");
stderr.writefln("Usage: %s Article_name [OPTION]...", args[0]);
stderr.writefln("Create a git repository with the history of the specified Wikipedia article.");
stderr.writefln("Supported options:");
stderr.writefln(" -h --help Display this help");
stderr.writefln(" --no-import Don't invoke ``git fast-import'' and only generate the fast-import data");
stderr.writefln(" --language LANG Specify the Wikipedia language subdomain (default: en)");
return 2;
}
@ -55,10 +56,10 @@ int main(string[] args)
if (spawnvp(P_WAIT, "curl", ["curl", "-d", "\"\"", "http://" ~ language ~ ".wikipedia.org/w/index.php?title=Special:Export&pages=" ~ encodeComponent(name), "-o", "history.xml"]))
throw new Exception("curl error");
fwritefln(stderr, "Loading history...");
stderr.writefln("Loading history...");
string xmldata = cast(string) read("history.xml");
std.file.remove("history.xml");
auto xml = new XmlDocument(new MemoryStream(xmldata));
auto xml = new XmlDocument(xmldata);
string data = "reset refs/heads/master\n";
auto page = xml[0]["page"];
@ -68,23 +69,23 @@ int main(string[] args)
if (child.tag=="revision")
{
string id = child["id"].text;
string summary = child["comment"] ? child["comment"].text : null;
string committer = child["contributor"]["username"] ? child["contributor"]["username"].text : child["contributor"]["ip"].text;
string summary = child.findChild("comment") ? child["comment"].text : null;
string committer = child["contributor"].findChild("username") ? child["contributor"]["username"].text : child["contributor"]["ip"].text;
string text = child["text"].text;
fwritefln(stderr, "Revision %s by %s: %s", id, committer, summary);
stderr.writefln("Revision %s by %s: %s", id, committer, summary);
summary ~= "\n\nhttp://" ~ language ~ ".wikipedia.org/w/index.php?oldid=" ~ id;
data ~=
"commit refs/heads/master\n" ~
"committer " ~ committer ~ " <" ~ committer ~ "@" ~ language ~ ".wikipedia.org> " ~ ISO8601toRFC2822(child["timestamp"].text) ~ "\n" ~
"data " ~ .toString(summary.length) ~ "\n" ~
"data " ~ to!string(summary.length) ~ "\n" ~
summary ~ "\n" ~
"M 644 inline " ~ name ~ ".txt\n" ~
"data " ~ .toString(text.length) ~ "\n" ~
"data " ~ to!string(text.length) ~ "\n" ~
text ~ "\n" ~
"\n";
}
write("fast-import-data", data);
std.file.write("fast-import-data", data);
if (noImport)
return 0;
@ -106,5 +107,5 @@ string ISO8601toRFC2822(string s)
// 2010-06-15T19:28:44Z
// Feb 6 11:22:18 2007 -0500
return monthNames[.toInt(s[5..7])-1] ~ " " ~ s[8..10] ~ " " ~ s[11..13] ~ ":" ~ s[14..16] ~ ":" ~ s[17..19] ~ " " ~ s[0..4] ~ " +0000";
return monthNames[.to!int(s[5..7])-1] ~ " " ~ s[8..10] ~ " " ~ s[11..13] ~ ":" ~ s[14..16] ~ ":" ~ s[17..19] ~ " " ~ s[0..4] ~ " +0000";
}