Skip to content

Commit

Permalink
adapt environment scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
paulheinr committed Jul 12, 2024
1 parent b07dfdc commit 2dd9538
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.tub.vsp.bvwp.data.mapper.environmentalCriteria;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.tub.vsp.bvwp.JSoupUtils;
Expand All @@ -10,9 +12,20 @@
import java.util.List;

public class StreetEnvironmentalCriteriaMapper {
public static final Logger logger = LogManager.getLogger(StreetEnvironmentalCriteriaMapper.class);

public static StreetEnvironmentalDataContainer mapDocument(Document document) {
StreetEnvironmentalDataContainer result = new StreetEnvironmentalDataContainer();

boolean exclude = document.select("p").stream().anyMatch(p -> p.text()
.contains("Die umweltfachliche Beurteilung von Knotenpunkten erfolgt in Form" +
" einer qualitativen Einschätzung."));

if (exclude) {
logger.warn("Excluding environmental criteria for this project");
return null;
}

Element envTable = JSoupUtils.getTableByCssKeyAndPredicate(document, "table.table_webprins",
StreetEnvironmentalCriteriaMapper::isEnvironmentalCriteriaTable)
.orElseThrow();
Expand Down Expand Up @@ -57,7 +70,7 @@ private static EnvironmentalCriteria extractWithSubEnvironmentalCriteria(Element
}

private static EnvironmentalCriteria.UmweltBewertung handleBewertung(String bewertung) {
if (bewertung.equals("-")) {
if (bewertung.equals("-") || bewertung.isBlank()) {
return null;
}
return EnvironmentalCriteria.UmweltBewertung.valueOf(bewertung.toUpperCase());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ public record Description(double absolute, double betroffenheit) {
}

public enum UmweltBewertung {
HOCH, MITTEL, GERING;
HOCH, MITTEL, GERING, PLANFESTGESTELLT;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package org.tub.vsp.bvwp.users.ph;

import org.apache.logging.log4j.Logger;
import org.tub.vsp.bvwp.data.container.analysis.StreetAnalysisDataContainer;
import org.tub.vsp.bvwp.data.container.base.street.StreetBaseDataContainer;
import org.tub.vsp.bvwp.data.container.base.street.StreetEnvironmentalDataContainer;
import org.tub.vsp.bvwp.data.type.EnvironmentalCriteria;
import org.tub.vsp.bvwp.scraping.StreetScraper;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;

public class RunEnvironmentalCsvWriting {
public static final Logger logger = org.apache.logging.log4j.LogManager.getLogger(RunEnvironmentalCsvWriting.class);

public static void main(String[] args) {
List<StreetAnalysisDataContainer> allStreetBaseData =
new StreetScraper().extractAllLocalBaseData("./data/street/all", "A", ".*", "")
.stream()
.map(s -> new StreetAnalysisDataContainer(s, 0))
.toList();

writeCsv(allStreetBaseData, "output/env_street_data.csv");
}

private static void writeCsv(List<StreetAnalysisDataContainer> streetAnalysisDataContainers, String fileName) {
List<List<String>> csvData = new ArrayList<>();
csvData.add(getHeaders());
for (StreetAnalysisDataContainer streetAnalysisDataContainer : streetAnalysisDataContainers) {
csvData.add(getEntry(streetAnalysisDataContainer.getStreetBaseDataContainer()));
}
try (BufferedWriter writer = new BufferedWriter(new FileWriter(fileName))) {
for (List<String> row : csvData) {
writer.write(String.join(",", row));
writer.newLine();
}
} catch (IOException e) {
e.printStackTrace();
}
}

private static List<String> getHeaders() {
List<String> headers = new ArrayList<>();
headers.add("name");
for (int i = 21; i < 30; i++) {
if (i == 22) {
headers.add(i + " bewertung");
for (int j = 1; j <= 2; j++) {
headers.add(i + " " + j + ") abs");
headers.add(i + " " + j + ") betroffenheit");
}
} else if (i == 24) {
headers.add(i + " bewertung");
for (int j = 1; j <= 4; j++) {
headers.add(i + " " + j + ") abs");
headers.add(i + " " + j + ") betroffenheit");
}
} else {
headers.add(i + " abs");
headers.add(i + " betroffenheit");
headers.add(i + " bewertung");
}
}
return headers;
}

private static List<String> getEntry(StreetBaseDataContainer streetBaseDataContainer) {
List<String> entries = new ArrayList<>();
String projectNumber = streetBaseDataContainer.getProjectInformation().getProjectNumber();

logger.info("While CSV writing. Processing project: {}", projectNumber);

entries.add(projectNumber);

StreetEnvironmentalDataContainer envCrit = streetBaseDataContainer.getEnvironmentalCriteria();

if (envCrit == null) {
logger.info("No environmental criteria.");
for (int i = 0; i < getHeaders().size() - 1; i++) {
entries.add(null);
}
return entries;
}

addEntries(entries, envCrit.getNaturschutzVorrangflaechen21());
addEntries(entries, envCrit.getNatura2000Gebiete22());
addEntries(entries, envCrit.getUnzerschnitteneKernraeume23());
addEntries(entries, envCrit.getUnzerschnitteneGrossraeume24());
addEntries(entries, envCrit.getFlaechenInanspruchnahme25());
addEntries(entries, envCrit.getUeberschwemmungsgebiete26());
addEntries(entries, envCrit.getWasserschutzgebiete27());
addEntries(entries, envCrit.getVerkehrsarmeRaeume28());
addEntries(entries, envCrit.getKulturLandschaftsschutz29());

return entries;
}

private static void addEntries(List<String> result, EnvironmentalCriteria criteria) {
Objects.requireNonNull(criteria);

if (criteria.description().size() > 1) {
result.add(Optional.ofNullable(criteria.bewertung()).map(Enum::toString).orElse(null));
for (EnvironmentalCriteria.Description description : criteria.description()) {
result.add(String.valueOf(description.absolute()));
result.add(String.valueOf(description.betroffenheit()));
}
} else {
result.add(String.valueOf(criteria.description().getFirst().absolute()));
result.add(String.valueOf(criteria.description().getFirst().betroffenheit()));
result.add(Optional.ofNullable(criteria.bewertung()).map(Enum::toString).orElse(null));
}


}
}

0 comments on commit 2dd9538

Please sign in to comment.