Sophomorix-Filter ergänzung

Tw33ki · 4. März 2026 um 14:10

Hallo Zusammen,

wir hatten in der Vergangenheit häufiger das Problem, dass verschiedene Umlaute die wir aus dem Export aus ASV-BW im .csv-Format erhalten hatten, sich nicht über die Schulkonsole importieren liesen.

Daher haben wir das entsprechende Sophomorix-Filterscript /usr/share/sophomorix/filter/asv-csv.filter dahingehend etwas ergänzt:

#!/usr/bin/perl -w

# This script converts the cvs dump of ASV-BW

# to the students.csv format for linuxmuster.net 7

#

use Data::Dumper;

$Data::Dumper::Indent = 1;

$Data::Dumper::Sortkeys = 1;

$Data::Dumper::Useqq = 1;

$Data::Dumper::Terse = 1;

my $source=$ARGV[0];

my $target=$ARGV[1];

my $tmp="/var/lib/sophomorix/tmp/students.csv.filter.tmp-asv-csv.filter";

my %unid_store=();

my %line_store=();

my $linecount=0;

my $filtercount=0;

my $headercount=0;

my %header=();

############################################################

# configuration

############################################################

# Fetch data from ASV-BW the following way:

# as described in https://github.com/linuxmuster/sophomorix4/wiki/Export-and-Filters

# call this script with /abs/path/asv-csv.filter  /source/file  /target/file

# your exported files needs headers to work with this filter

# Header names in the analized file

# Edit ONLY the last, german names

$header{'known_headers'}{'class'}="Klasse";

$header{'known_headers'}{'sn'}="Familienname";

$header{'known_headers'}{'givenName'}="Vornamen";

$header{'known_headers'}{'birthday'}="Geburtsdatum";

$header{'known_headers'}{'unid'}="Schueler-ID";

# how many ; has your file?

$max_configured_semicolons_per_line=15;

$min_configured_semicolons_per_line=4;

############################################################

# Start the script

############################################################

foreach my $key( keys %{ $header{'known_headers'} }){

    $header{'configured_headers'}{$header{'known_headers'}{$key}}=$key;

}

# Order of the Headers in the filtered file

$header{'output'}{'class'}=1;

$header{'output'}{'sn'}=2;

$header{'output'}{'givenName'}=3;

$header{'output'}{'birthday'}=4;

$header{'output'}{'unid'}=5;

open(SOURCE, "<$source") || die "Error: $! $source not found!";

open(TMP, ">$tmp") || die "Error: $! $tmp not found!";

print TMP "# created by filterscript\n";

print TMP "# $0\n";

print TMP "# @ARGV\n";

while (<SOURCE>){

    chomp();

    $linecount++;

    # skip empty lines  

    if ($_ eq ""){

        next;

    }

    # split items in line

    my @items=split(";");

    # find the header

    if (exists $header{'configured_headers'}{$items[0]} and

 exists $header{'configured_headers'}{$items[1]} and

 exists $header{'configured_headers'}{$items[2]}

       ){

 print  "This is the header in LINE $linecount: $_\n";

        my $count=0;

 foreach my $item (@items){

    $count++;

    $header{'fields_in_input_file'}{$item}=$count;

 }

 print Dumper(%header);

 $headercount++;

 print TMP "# ".$_."\n";

 next;

    }

    if ($headercount==1){

        # ok

    } else {

        print "\n# $headercount header found at the beginning of the file $headercount #\n\n";

        exit;

    }

  

    # display LINE

    my $class=$items[$header{'fields_in_input_file'}{$header{'known_headers'}{'class'}}-1];

    my $sn=$items[$header{'fields_in_input_file'}{$header{'known_headers'}{'sn'}}-1];

    my $given_name=$items[$header{'fields_in_input_file'}{$header{'known_headers'}{'givenName'}}-1];

    my $birthday=$items[$header{'fields_in_input_file'}{$header{'known_headers'}{'birthday'}}-1];

    my $unid=$items[$header{'fields_in_input_file'}{$header{'known_headers'}{'unid'}}-1];

    # skip classes beginning with _

    if ($class=~m/^_/){

        next;

    }

    # skip classes beginning with "Abgang "

    if ($class=~m/^Abgang /){

        next;

    }

    print "LINE $linecount:\n";

    print "   $header{'known_headers'}{'class'}: >$class<\n";

    print "   $header{'known_headers'}{'sn'}: >$sn<\n";

    print "   $header{'known_headers'}{'givenName'}: >$given_name<\n";

    print "   $header{'known_headers'}{'birthday'}: >$birthday<\n";

    print "   $header{'known_headers'}{'unid'}: >$unid<\n";

    # exit if line seems suspicious

    my $semicolons_per_line=tr/;//;

    if ($semicolons_per_line<$min_configured_semicolons_per_line or

 $semicolons_per_line>$max_configured_semicolons_per_line ){

        print "\nSemicolons in line $linecount is not correct: $semicolons_per_line\n";

        print "    Allowed: $min_configured_semicolons_per_line to $max_configured_semicolons_per_line\n\n";

        exit;

    }

    # ignore if line is double

    if (exists $line_store{$_}){

        print "\n# WARNING: line $_ is double (Line $line_store{$_} and $linecount)#\n\n";

        next;

    } else {

        $line_store{$_}=$linecount;

    }

    # exit if ID is double

    if (exists $unid_store{$unid}){

 print "$_";

        print "\n# ERROR: unid $unid is double (Line $unid_store{$unid} and $linecount)#\n\n";

        exit;

    } else {

        $unid_store{$unid}=$linecount;

    }

    # ==== NEU: Umlaute und Sonderzeichen ersetzen ====

    $class       = &replace_umlaute($class);

    $sn          = &replace_umlaute($sn);

    $given_name  = &replace_umlaute($given_name);

    $birthday    = &replace_umlaute($birthday);

    $unid        = &replace_umlaute($unid);

    # create line

    $filtercount++;

    my $filtered_line=$class.";".

              $sn.";".

              $given_name.";".

              $birthday.";".

              $unid.";";

    print "  -> Filtered to $filtercount: $filtered_line\n";

    print TMP $filtered_line."\n";

}

close(SOURCE);

close(TMP);

# if all worked well: copy file to final location

if ($headercount==1){

    print "# 1 Header found. Looks good! #\n";

} else {

        print "\n# $headercount header found. Cannot continue without header or more than one header#\n\n";

        exit;

}

system("cp $tmp $target");

system("rm $tmp");

############################################################

# subs

############################################################

sub remove_quote {

    my ($string)=@_;

    $string=~s/^"//g;

    $string=~s/"$//g;

    $string=~s/^ //g;

    $string=~s/ $//g;

    return $string;

}

sub replace_umlaute {

    my ($text) = @_;

    my %replacements = (

        # Deutsche Umlaute

        'ä' => 'ae', 'ö' => 'oe', 'ü' => 'ue', 'ß' => 'ss',

        'Ä' => 'Ae', 'Ö' => 'Oe', 'Ü' => 'Ue',

        # Französische Akzente

        'é' => 'e', 'è' => 'e', 'ê' => 'e', 'ë' => 'e',

        'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'å' => 'a',

        'ç' => 'c', 'î' => 'i', 'ï' => 'i', 'ô' => 'o', 'ù' => 'u', 'û' => 'u',

        'ÿ' => 'y', 'æ' => 'ae', 'œ' => 'oe',

        # Spanische Akzente

        'ñ' => 'n', 'í' => 'i', 'ó' => 'o', 'ú' => 'u',

        # Italienische Akzente

        'ì' => 'i', 'ò' => 'o',

        # Skandinavische Zeichen

        'ø' => 'o',

        # Osteuropäische Zeichen

        'ć' => 'c', 'č' => 'c', 'š' => 's', 'ž' => 'z', 'đ' => 'd',

        'ł' => 'l', 'ń' => 'n', 'ś' => 's', 'ź' => 'z',

    );

    for my $key (keys %replacements) {

        $text =~ s/\Q$key\E/$replacements{$key}/g;

    }

    return $text;

}

Vielleicht hat jemand Verbesserungsvorschläge, oder steht auch irgendwann vor dem gleichen Problem.

Timo