Update to D2
This commit is contained in:
parent
15decb678f
commit
6f29dd4a2b
|
@ -0,0 +1 @@
|
|||
*.exe
|
|
@ -0,0 +1,3 @@
|
|||
[submodule "ae"]
|
||||
path = ae
|
||||
url = git://github.com/CyberShadow/ae.git
|
9
README
9
README
|
@ -1,9 +0,0 @@
|
|||
This project is written to use the Phobos standard library for D,
|
||||
so it must be compiled with a compiler which supports Phobos, such
|
||||
as GDC.
|
||||
|
||||
To compile it using GDC:
|
||||
|
||||
gdc *.d -o wp2git
|
||||
|
||||
which generates an executable called wp2git.
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 196247891e84b6b30b54089d4a64163fad6807c3
|
355
litexml.d
355
litexml.d
|
@ -1,355 +0,0 @@
|
|||
// Written in the D Programming Language, version 1
|
||||
|
||||
/// Light read-only XML library
|
||||
module litexml;
|
||||
|
||||
import std.stream;
|
||||
import std.string;
|
||||
import std.ctype;
|
||||
|
||||
alias std.string.iswhite iswhite;
|
||||
alias std.string.tolower tolower;
|
||||
|
||||
enum XmlNodeType
|
||||
{
|
||||
Root,
|
||||
Node,
|
||||
//Comment,
|
||||
Meta,
|
||||
Text
|
||||
}
|
||||
|
||||
class XmlNode
|
||||
{
|
||||
string tag;
|
||||
string[string] attributes;
|
||||
XmlNode[] children;
|
||||
XmlNodeType type;
|
||||
|
||||
this(Stream s)
|
||||
{
|
||||
again:
|
||||
char c;
|
||||
do
|
||||
s.read(c);
|
||||
while(iswhite(c));
|
||||
|
||||
if(c!='<') // text node
|
||||
{
|
||||
type = XmlNodeType.Text;
|
||||
while(c!='<')
|
||||
{
|
||||
// TODO: check for EOF
|
||||
tag ~= c;
|
||||
s.read(c);
|
||||
}
|
||||
s.seekCur(-1); // rewind to '<'
|
||||
//tag = tag.strip();
|
||||
}
|
||||
else
|
||||
{
|
||||
s.read(c);
|
||||
if(c=='!') // comment
|
||||
{
|
||||
expect(s, '-');
|
||||
expect(s, '-');
|
||||
char c1, c2, c3;
|
||||
do
|
||||
{
|
||||
c1 = c2; c2 = c3; s.read(c3);
|
||||
} while (!(c1=='-' && c2=='-' && c3=='>'));
|
||||
goto again;
|
||||
}
|
||||
else
|
||||
if(c=='?')
|
||||
{
|
||||
type = XmlNodeType.Meta;
|
||||
tag=readWord(s);
|
||||
if(tag.length==0) throw new Exception("Invalid tag");
|
||||
while(true)
|
||||
{
|
||||
skipWhitespace(s);
|
||||
if(peek(s)=='?')
|
||||
break;
|
||||
readAttribute(s);
|
||||
}
|
||||
s.read(c);
|
||||
expect(s, '>');
|
||||
}
|
||||
else
|
||||
if(c=='/')
|
||||
throw new Exception("Unexpected close tag");
|
||||
else
|
||||
{
|
||||
type = XmlNodeType.Node;
|
||||
tag = c~readWord(s);
|
||||
while(true)
|
||||
{
|
||||
skipWhitespace(s);
|
||||
c = peek(s);
|
||||
if(c=='>' || c=='/')
|
||||
break;
|
||||
readAttribute(s);
|
||||
}
|
||||
s.read(c);
|
||||
if(c=='>')
|
||||
{
|
||||
while(true)
|
||||
{
|
||||
skipWhitespace(s);
|
||||
if(peek(s)=='<' && peek(s, 2)=='/')
|
||||
break;
|
||||
try
|
||||
children ~= new XmlNode(s);
|
||||
catch(Object e)
|
||||
throw new Exception("Error while processing child of "~tag~":\n"~e.toString);
|
||||
}
|
||||
expect(s, '<');
|
||||
expect(s, '/');
|
||||
foreach(tc;tag)
|
||||
expect(s, tc);
|
||||
expect(s, '>');
|
||||
}
|
||||
else
|
||||
expect(s, '>');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string toString()
|
||||
{
|
||||
switch(type)
|
||||
{
|
||||
case XmlNodeType.Text:
|
||||
// TODO: compact whitespace
|
||||
return '"' ~ convertEntities(tag) ~ '"';
|
||||
case XmlNodeType.Node:
|
||||
case XmlNodeType.Root:
|
||||
string attrText;
|
||||
foreach(key,value;attributes)
|
||||
attrText ~= ' ' ~ key ~ `="` ~ value ~ '"';
|
||||
string childrenText;
|
||||
foreach(child;children)
|
||||
childrenText ~= child.toString();
|
||||
return '<' ~ tag ~ attrText ~ '>' ~ childrenText ~ "</" ~ tag ~ '>';
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
string text()
|
||||
{
|
||||
switch(type)
|
||||
{
|
||||
case XmlNodeType.Text:
|
||||
return convertEntities(tag);
|
||||
case XmlNodeType.Node:
|
||||
case XmlNodeType.Root:
|
||||
string childrenText;
|
||||
foreach(child;children)
|
||||
childrenText ~= child.text();
|
||||
return childrenText;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
final XmlNode findChild(string tag)
|
||||
{
|
||||
foreach(child;children)
|
||||
if(child.tag == tag)
|
||||
return child;
|
||||
return null;
|
||||
}
|
||||
|
||||
final XmlNode opIndex(string tag)
|
||||
{
|
||||
return findChild(tag);
|
||||
}
|
||||
|
||||
final XmlNode opIndex(int index)
|
||||
{
|
||||
return children[index];
|
||||
}
|
||||
|
||||
int opApply(int delegate(ref XmlNode) dg)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
for (int i = 0; i < children.length; i++)
|
||||
{
|
||||
result = dg(children[i]);
|
||||
if (result)
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
final void readAttribute(Stream s)
|
||||
{
|
||||
string name = readWord(s);
|
||||
if(name.length==0) throw new Exception("Invalid attribute");
|
||||
skipWhitespace(s);
|
||||
expect(s, '=');
|
||||
skipWhitespace(s);
|
||||
char delim;
|
||||
s.read(delim);
|
||||
if(delim != '\'' && delim != '"')
|
||||
throw new Exception("Expected ' or \'");
|
||||
string value;
|
||||
while(true)
|
||||
{
|
||||
char c;
|
||||
s.read(c);
|
||||
if(c==delim) break;
|
||||
value ~= c;
|
||||
}
|
||||
attributes[name]=value;
|
||||
}
|
||||
|
||||
this()
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
class XmlDocument : XmlNode
|
||||
{
|
||||
this(Stream s)
|
||||
{
|
||||
type = XmlNodeType.Root;
|
||||
tag = "<Root>";
|
||||
skipWhitespace(s);
|
||||
while(s.position < s.size)
|
||||
try
|
||||
{
|
||||
children ~= new XmlNode(s);
|
||||
skipWhitespace(s);
|
||||
}
|
||||
catch(Object o)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
char peek(Stream s, int n=1)
|
||||
{
|
||||
char c;
|
||||
for(int i=0;i<n;i++)
|
||||
s.read(c);
|
||||
s.seekCur(-n);
|
||||
return c;
|
||||
}
|
||||
|
||||
void skipWhitespace(Stream s)
|
||||
{
|
||||
char c;
|
||||
do
|
||||
{
|
||||
if(s.position==s.size)
|
||||
return;
|
||||
s.read(c);
|
||||
}
|
||||
while(iswhite(c));
|
||||
s.seekCur(-1);
|
||||
}
|
||||
|
||||
bool isWord(char c)
|
||||
{
|
||||
return c=='-' || c=='_' || c==':' || isalnum(c);
|
||||
}
|
||||
|
||||
string readWord(Stream s)
|
||||
{
|
||||
char c;
|
||||
string result;
|
||||
while(true)
|
||||
{
|
||||
s.read(c);
|
||||
if(!isWord(c))
|
||||
break;
|
||||
result ~= c;
|
||||
}
|
||||
s.seekCur(-1);
|
||||
return result;
|
||||
}
|
||||
|
||||
void expect(Stream s, char c)
|
||||
{
|
||||
char c2;
|
||||
s.read(c2);
|
||||
if(c!=c2)
|
||||
throw new Exception("Expected " ~ c ~ ", got " ~ c2);
|
||||
}
|
||||
|
||||
const string[string] entities;
|
||||
static this()
|
||||
{
|
||||
entities =
|
||||
[
|
||||
"quot"[]: \"[],
|
||||
"amp" : \& ,
|
||||
"lt" : \< ,
|
||||
"gt" : \> ,
|
||||
"circ" : \ˆ ,
|
||||
"tilde" : \˜ ,
|
||||
"nbsp" : \ ,
|
||||
"ensp" : \  ,
|
||||
"emsp" : \  ,
|
||||
"thinsp": \ ,
|
||||
"ndash" : \– ,
|
||||
"mdash" : \— ,
|
||||
"lsquo" : \‘ ,
|
||||
"rsquo" : \’ ,
|
||||
// "sbquo" : \‚ ,
|
||||
"ldquo" : \“ ,
|
||||
"rdquo" : \” ,
|
||||
// "bdquo" : \„ ,
|
||||
"dagger": \†,
|
||||
"Dagger": \‡,
|
||||
"permil": \‰
|
||||
// "lsaquo": \‹,
|
||||
// "rsaquo": \›,
|
||||
// "euro" : \€
|
||||
];
|
||||
}
|
||||
|
||||
import std.utf;
|
||||
import std.c.stdio;
|
||||
|
||||
public string convertEntities(string source)
|
||||
{
|
||||
mainLoop:
|
||||
dstring str = toUTF32(source);
|
||||
for(int i=0;i<str.length;i++)
|
||||
{
|
||||
if(str[i]=='&')
|
||||
for(int j=i+1;j<str.length;j++)
|
||||
if(str[j]==';')
|
||||
{
|
||||
string entity = toUTF8(str[i+1..j]);
|
||||
if(entity.length>0)
|
||||
if(entity[0]=='#')
|
||||
if(entity.length>1 && entity[1]=='x')
|
||||
{
|
||||
dchar c;
|
||||
sscanf(toStringz(entity[2..$]), "%x", &c);
|
||||
if(c)
|
||||
str = str[0..i] ~ c ~ str[j+1..$];
|
||||
}
|
||||
else
|
||||
{
|
||||
dchar c;
|
||||
sscanf(toStringz(entity[1..$]), "%d", &c);
|
||||
if(c)
|
||||
str = str[0..i] ~ c ~ str[j+1..$];
|
||||
}
|
||||
else
|
||||
if(entity in entities)
|
||||
str = str[0..i] ~ toUTF32(entities[entity]) ~ str[j+1..$];
|
||||
break;
|
||||
}
|
||||
}
|
||||
return toUTF8(str);
|
||||
}
|
35
wp2git.d
35
wp2git.d
|
@ -1,4 +1,4 @@
|
|||
// Written in the D Programming Language, version 1
|
||||
// Written in the D Programming Language (version 2)
|
||||
|
||||
import std.stdio;
|
||||
import std.process;
|
||||
|
@ -7,7 +7,8 @@ import std.string;
|
|||
import std.file;
|
||||
import std.conv;
|
||||
import std.uri;
|
||||
import litexml;
|
||||
|
||||
import ae.utils.xmllite;
|
||||
|
||||
int main(string[] args)
|
||||
{
|
||||
|
@ -37,12 +38,12 @@ int main(string[] args)
|
|||
|
||||
if (args.length == 1 || usage)
|
||||
{
|
||||
fwritefln(stderr, "Usage: %s Article_name [OPTION]...", args[0]);
|
||||
fwritefln(stderr, "Create a git repository with the history of the specified Wikipedia article.");
|
||||
fwritefln(stderr, "Supported options:");
|
||||
fwritefln(stderr, " -h --help Display this help");
|
||||
fwritefln(stderr, " --no-import Don't invoke ``git fast-import'' and only generate the fast-import data");
|
||||
fwritefln(stderr, " --language LANG Specify the Wikipedia language subdomain (default: en)");
|
||||
stderr.writefln("Usage: %s Article_name [OPTION]...", args[0]);
|
||||
stderr.writefln("Create a git repository with the history of the specified Wikipedia article.");
|
||||
stderr.writefln("Supported options:");
|
||||
stderr.writefln(" -h --help Display this help");
|
||||
stderr.writefln(" --no-import Don't invoke ``git fast-import'' and only generate the fast-import data");
|
||||
stderr.writefln(" --language LANG Specify the Wikipedia language subdomain (default: en)");
|
||||
return 2;
|
||||
}
|
||||
|
||||
|
@ -55,10 +56,10 @@ int main(string[] args)
|
|||
if (spawnvp(P_WAIT, "curl", ["curl", "-d", "\"\"", "http://" ~ language ~ ".wikipedia.org/w/index.php?title=Special:Export&pages=" ~ encodeComponent(name), "-o", "history.xml"]))
|
||||
throw new Exception("curl error");
|
||||
|
||||
fwritefln(stderr, "Loading history...");
|
||||
stderr.writefln("Loading history...");
|
||||
string xmldata = cast(string) read("history.xml");
|
||||
std.file.remove("history.xml");
|
||||
auto xml = new XmlDocument(new MemoryStream(xmldata));
|
||||
auto xml = new XmlDocument(xmldata);
|
||||
|
||||
string data = "reset refs/heads/master\n";
|
||||
auto page = xml[0]["page"];
|
||||
|
@ -68,23 +69,23 @@ int main(string[] args)
|
|||
if (child.tag=="revision")
|
||||
{
|
||||
string id = child["id"].text;
|
||||
string summary = child["comment"] ? child["comment"].text : null;
|
||||
string committer = child["contributor"]["username"] ? child["contributor"]["username"].text : child["contributor"]["ip"].text;
|
||||
string summary = child.findChild("comment") ? child["comment"].text : null;
|
||||
string committer = child["contributor"].findChild("username") ? child["contributor"]["username"].text : child["contributor"]["ip"].text;
|
||||
string text = child["text"].text;
|
||||
fwritefln(stderr, "Revision %s by %s: %s", id, committer, summary);
|
||||
stderr.writefln("Revision %s by %s: %s", id, committer, summary);
|
||||
|
||||
summary ~= "\n\nhttp://" ~ language ~ ".wikipedia.org/w/index.php?oldid=" ~ id;
|
||||
data ~=
|
||||
"commit refs/heads/master\n" ~
|
||||
"committer " ~ committer ~ " <" ~ committer ~ "@" ~ language ~ ".wikipedia.org> " ~ ISO8601toRFC2822(child["timestamp"].text) ~ "\n" ~
|
||||
"data " ~ .toString(summary.length) ~ "\n" ~
|
||||
"data " ~ to!string(summary.length) ~ "\n" ~
|
||||
summary ~ "\n" ~
|
||||
"M 644 inline " ~ name ~ ".txt\n" ~
|
||||
"data " ~ .toString(text.length) ~ "\n" ~
|
||||
"data " ~ to!string(text.length) ~ "\n" ~
|
||||
text ~ "\n" ~
|
||||
"\n";
|
||||
}
|
||||
write("fast-import-data", data);
|
||||
std.file.write("fast-import-data", data);
|
||||
|
||||
if (noImport)
|
||||
return 0;
|
||||
|
@ -106,5 +107,5 @@ string ISO8601toRFC2822(string s)
|
|||
|
||||
// 2010-06-15T19:28:44Z
|
||||
// Feb 6 11:22:18 2007 -0500
|
||||
return monthNames[.toInt(s[5..7])-1] ~ " " ~ s[8..10] ~ " " ~ s[11..13] ~ ":" ~ s[14..16] ~ ":" ~ s[17..19] ~ " " ~ s[0..4] ~ " +0000";
|
||||
return monthNames[.to!int(s[5..7])-1] ~ " " ~ s[8..10] ~ " " ~ s[11..13] ~ ":" ~ s[14..16] ~ ":" ~ s[17..19] ~ " " ~ s[0..4] ~ " +0000";
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue