| (1) get a group from a string
|
import re
text = "Asian.lst"
result = re.search(r"(.*)\.lst", text)
if result:
filename = result.group(1)
print("filename:", filename)
|
import std.stdio;
import std.regex;
void main()
{
string text = "Asian.lst";
auto result = matchFirst(text, regex(r"(.*)\.lst"));
if (result)
{
string filename = result.captures[1]; // capture group 1
// string filename = result[1]; // shorter version
writeln("filename: ", filename);
}
}
|
Output:
| (2) match a string against a regexp
|
import re
text = "Asian.lst"
result = re.search(r"ian", text)
if result:
print("contains 'ian'")
|
import std.stdio;
import std.regex;
void main()
{
string text = "Asian.lst";
auto result = matchFirst(text, regex(r"ian"));
if (result)
{
writeln("contains 'ian'");
}
}
|
Output:
| (3) find all the occurences of a substring in a string
|
import re
text = """
<a href="ad1">sdqs</a>
<a href="ad2">sds</a>
<a href=ad3>qs</a>
"""
m = re.findall(r'href="?(.*?)"?>', text)
print(m) # ['ad1', 'ad2', 'ad3']
|
import std.stdio;
import std.regex;
import std.algorithm;
import std.array;
const text = `
<a href="ad1">sdqs</a>
<a href="ad2">sds</a>
<a href=ad3>qs</a>
`;
string[] findAll(const string re, const string text)
{
return text.matchAll(regex(re)).map!(m => m[1]).array;
}
void main()
{
auto li = findAll(`href="?(.*?)"?>`, text);
writeln(li); // ["ad1", "ad2", "ad3"]
}
|
Problem with backreferences
The std.regex package in the stdlib is not perfect. Backreferences don't work correctly :( I ran into a problem that exists since 2015…
On Discord, Paul Backus summarized it as follows: "It's specifically the combination of (1) a backreference, (2) with a .* in front of it, (3) with "extra" characters at the start of the string and between the two parts that are supposed to match."
Links:
Example with a workaround:
import std.regex;
import std.stdio;
void main()
{
string text = "baacaa";
// auto result = matchFirst(text, regex(r"(..).*\1")); // buggy
auto result = matchFirst(text, regex(r"(..).{0,999999}\1")); // workaround
if (result) {
writeln(text);
}
}
The workaround was posted by pbackus (thanks!). His comments: "As a workaround, you can replace .* with {0,N} for some large value of N".